1/*
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1995-2022 Wolfram Schneider <wosch@FreeBSD.org>
5 * Copyright (c) 1989, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * James A. Woods.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36
37#ifndef _LOCATE_STATISTIC_
38#define _LOCATE_STATISTIC_
39
40void
41statistic (FILE *fp, char *path_fcodes)
42{
43	long lines, chars, size, size_nbg, big, zwerg, umlaut;
44	u_char *p, *s;
45	int c;
46	int count, longest_path;
47	int error = 0;
48	u_char bigram1[NBG], bigram2[NBG], path[LOCATE_PATH_MAX];
49
50	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) {
51		p[c] = check_bigram_char(getc(fp));
52		s[c] = check_bigram_char(getc(fp));
53	}
54
55	lines = chars = big = zwerg = umlaut = longest_path = 0;
56	size = NBG + NBG;
57
58	for (c = getc(fp), count = 0; c != EOF; size++) {
59		if (c == SWITCH) {
60			count += getwf(fp) - OFFSET;
61			size += sizeof(int);
62			zwerg++;
63		} else
64			count += c - OFFSET;
65
66		if (count < 0 || count >= LOCATE_PATH_MAX) {
67			/* stop on error and display the statstics anyway */
68			warnx("corrupted database: %s %d", path_fcodes, count);
69			error = 1;
70			break;
71		}
72
73		for (p = path + count; (c = getc(fp)) > SWITCH; size++)
74			if (c < PARITY) {
75				if (c == UMLAUT) {
76					c = getc(fp);
77					size++;
78					umlaut++;
79				}
80				p++;
81			} else {
82				/* bigram char */
83				big++;
84				p += 2;
85			}
86
87		p++;
88		lines++;
89		chars += (p - path);
90		if ((p - path) > longest_path)
91			longest_path = p - path;
92	}
93
94	/* size without bigram db */
95	size_nbg = size - (2 * NBG);
96
97	(void)printf("\nDatabase: %s\n", path_fcodes);
98	(void)printf("Compression: Front: %2.2f%%, ", chars > 0 ?  (size_nbg + big) / (chars / (float)100) : 0);
99	(void)printf("Bigram: %2.2f%%, ", big > 0 ? (size_nbg - big) / (size_nbg / (float)100) : 0);
100	/* incl. bigram db overhead */
101	(void)printf("Total: %2.2f%%\n", chars > 0 ?  size / (chars / (float)100) : 0);
102	(void)printf("Filenames: %ld, ", lines);
103	(void)printf("Characters: %ld, ", chars);
104	(void)printf("Database size: %ld\n", size);
105	(void)printf("Bigram characters: %ld, ", big);
106	(void)printf("Integers: %ld, ", zwerg);
107	(void)printf("8-Bit characters: %ld\n", umlaut);
108	printf("Longest path: %d\n", longest_path > 0 ? longest_path - 1 : 0);
109
110	/* non zero exit on corrupt database */
111	if (error)
112		exit(error);
113}
114#endif /* _LOCATE_STATISTIC_ */
115
116extern	char	separator;
117
118void
119#ifdef FF_MMAP
120
121
122#ifdef FF_ICASE
123fastfind_mmap_icase
124#else
125fastfind_mmap
126#endif /* FF_ICASE */
127(char *pathpart, caddr_t paddr, off_t len, char *database)
128
129
130#else /* MMAP */
131
132
133#ifdef FF_ICASE
134fastfind_icase
135#else
136fastfind
137#endif /* FF_ICASE */
138
139(FILE *fp, char *pathpart, char *database)
140
141
142#endif /* MMAP */
143
144{
145	u_char *p, *s, *patend, *q, *foundchar;
146	int c, cc;
147	int count, found, globflag;
148	u_char *cutoff;
149	u_char bigram1[NBG], bigram2[NBG], path[LOCATE_PATH_MAX + 2];
150
151#ifdef FF_ICASE
152	/* use a lookup table for case insensitive search */
153	u_char table[UCHAR_MAX + 1];
154
155	tolower_word(pathpart);
156#endif /* FF_ICASE*/
157
158	/* init bigram table */
159#ifdef FF_MMAP
160	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++, len-= 2) {
161		p[c] = check_bigram_char(*paddr++);
162		s[c] = check_bigram_char(*paddr++);
163	}
164#else
165	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) {
166		p[c] = check_bigram_char(getc(fp));
167		s[c] = check_bigram_char(getc(fp));
168	}
169#endif /* FF_MMAP */
170
171	/* find optimal (last) char for searching */
172	for (p = pathpart; *p != '\0'; p++)
173		if (strchr(LOCATE_REG, *p) != NULL)
174			break;
175
176	if (*p == '\0')
177		globflag = 0;
178	else
179		globflag = 1;
180
181	p = pathpart;
182	patend = patprep(p);
183	cc = *patend;
184
185#ifdef FF_ICASE
186	/* set patend char to true */
187        for (c = 0; c < UCHAR_MAX + 1; c++)
188                table[c] = 0;
189
190	table[TOLOWER(*patend)] = 1;
191	table[toupper(*patend)] = 1;
192#endif /* FF_ICASE */
193
194
195	/* main loop */
196	found = count = 0;
197	foundchar = 0;
198
199#ifdef FF_MMAP
200	c = (u_char)*paddr++;
201	len--;
202
203	for (; len > 0; ) {
204#else
205	c = getc(fp);
206	for (; c != EOF; ) {
207#endif /* FF_MMAP */
208
209		/* go forward or backward */
210		if (c == SWITCH) { /* big step, an integer */
211#ifdef FF_MMAP
212			if (len < sizeof(int))
213				errx(1, "corrupted database: %s", database);
214
215			count += getwm(paddr) - OFFSET;
216			len -= INTSIZE;
217			paddr += INTSIZE;
218#else
219			count +=  getwf(fp) - OFFSET;
220#endif /* FF_MMAP */
221		} else {	   /* slow step, =< 14 chars */
222			count += c - OFFSET;
223		}
224
225		if (count < 0 || count >= LOCATE_PATH_MAX)
226			errx(1, "corrupted database: %s %d", database, count);
227
228		/* overlay old path */
229		p = path + count;
230		foundchar = p - 1;
231
232#ifdef FF_MMAP
233		for (; len > 0;) {
234			c = (u_char)*paddr++;
235		        len--;
236#else
237		for (;;) {
238			c = getc(fp);
239#endif /* FF_MMAP */
240			/*
241			 * == UMLAUT: 8 bit char followed
242			 * <= SWITCH: offset
243			 * >= PARITY: bigram
244			 * rest:      single ascii char
245			 *
246			 * offset < SWITCH < UMLAUT < ascii < PARITY < bigram
247			 */
248			if (c < PARITY) {
249				if (c <= UMLAUT) {
250					if (c == UMLAUT) {
251#ifdef FF_MMAP
252						c = (u_char)*paddr++;
253						len--;
254#else
255						c = getc(fp);
256#endif /* FF_MMAP */
257
258					} else
259						break; /* SWITCH */
260				}
261#ifdef FF_ICASE
262				if (table[c])
263#else
264				if (c == cc)
265#endif /* FF_ICASE */
266					foundchar = p;
267				*p++ = c;
268			}
269			else {
270				/* bigrams are parity-marked */
271				TO7BIT(c);
272
273#ifndef FF_ICASE
274				if (bigram1[c] == cc ||
275				    bigram2[c] == cc)
276#else
277
278					if (table[bigram1[c]] ||
279					    table[bigram2[c]])
280#endif /* FF_ICASE */
281						foundchar = p + 1;
282
283				*p++ = bigram1[c];
284				*p++ = bigram2[c];
285			}
286
287			if (p - path >= LOCATE_PATH_MAX)
288				errx(1, "corrupted database: %s %td", database, p - path);
289
290		}
291
292		if (found) {                     /* previous line matched */
293			cutoff = path;
294			*p-- = '\0';
295			foundchar = p;
296		} else if (foundchar >= path + count) { /* a char matched */
297			*p-- = '\0';
298			cutoff = path + count;
299		} else                           /* nothing to do */
300			continue;
301
302		found = 0;
303		for (s = foundchar; s >= cutoff; s--) {
304			if (*s == cc
305#ifdef FF_ICASE
306			    || TOLOWER(*s) == cc
307#endif /* FF_ICASE */
308			    ) {	/* fast first char check */
309				for (p = patend - 1, q = s - 1; *p != '\0';
310				     p--, q--)
311					if (*q != *p
312#ifdef FF_ICASE
313					    && TOLOWER(*q) != *p
314#endif /* FF_ICASE */
315					    )
316						break;
317				if (*p == '\0') {   /* fast match success */
318					found = 1;
319					if (!globflag ||
320#ifndef FF_ICASE
321					    !fnmatch(pathpart, path, 0))
322#else
323					    !fnmatch(pathpart, path,
324						     FNM_CASEFOLD))
325#endif /* !FF_ICASE */
326					{
327						if (f_silent)
328							counter++;
329						else if (f_limit) {
330							counter++;
331							if (f_limit >= counter)
332								(void)printf("%s%c",path,separator);
333							else
334								errx(0, "[show only %ld lines]", counter - 1);
335						} else
336							(void)printf("%s%c",path,separator);
337					}
338					break;
339				}
340			}
341		}
342	}
343}
344