fastfind.c revision 18829
1234287Sdim/*
2234287Sdim * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
3234287Sdim * Copyright (c) 1989, 1993
4234287Sdim *	The Regents of the University of California.  All rights reserved.
5234287Sdim *
6234287Sdim * This code is derived from software contributed to Berkeley by
7234287Sdim * James A. Woods.
8234287Sdim *
9234287Sdim * Redistribution and use in source and binary forms, with or without
10234287Sdim * modification, are permitted provided that the following conditions
11234287Sdim * are met:
12234287Sdim * 1. Redistributions of source code must retain the above copyright
13234287Sdim *    notice, this list of conditions and the following disclaimer.
14234287Sdim * 2. Redistributions in binary form must reproduce the above copyright
15249423Sdim *    notice, this list of conditions and the following disclaimer in the
16234287Sdim *    documentation and/or other materials provided with the distribution.
17234287Sdim * 3. All advertising materials mentioning features or use of this software
18249423Sdim *    must display the following acknowledgement:
19234287Sdim *	This product includes software developed by the University of
20234287Sdim *	California, Berkeley and its contributors.
21234287Sdim * 4. Neither the name of the University nor the names of its contributors
22234287Sdim *    may be used to endorse or promote products derived from this software
23234287Sdim *    without specific prior written permission.
24234287Sdim *
25234287Sdim * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26234287Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27234287Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28234287Sdim * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29234287Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30276479Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31234287Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32234287Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33234287Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34234287Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35234287Sdim * SUCH DAMAGE.
36234287Sdim *
37234287Sdim * $Id: fastfind.c,v 1.1 1996/08/31 23:14:52 wosch Exp $
38234287Sdim */
39234287Sdim
40234287Sdim
41234287Sdim#ifndef _LOCATE_STATISTIC_
42234287Sdim#define _LOCATE_STATISTIC_
43234287Sdim
44234287Sdimvoid
45234287Sdimstatistic (fp, path_fcodes)
46234287Sdim	FILE *fp;               /* open database */
47234287Sdim	char *path_fcodes;  	/* for error message */
48234287Sdim{
49234287Sdim	register int lines, chars, size, big;
50234287Sdim	register u_char *p, *s;
51234287Sdim	register int c;
52234287Sdim	int count;
53234287Sdim	u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
54234287Sdim
55234287Sdim	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) {
56234287Sdim		p[c] = check_bigram_char(getc(fp));
57234287Sdim		s[c] = check_bigram_char(getc(fp));
58234287Sdim	}
59234287Sdim
60234287Sdim	lines = chars = big = 0;
61234287Sdim	size = NBG + NBG;
62234287Sdim
63234287Sdim	for (c = getc(fp), count = 0; c != EOF; size++) {
64234287Sdim		if (c == SWITCH) {
65234287Sdim			count += getwf(fp) - OFFSET;
66234287Sdim			size += sizeof(int);
67234287Sdim		} else
68234287Sdim			count += c - OFFSET;
69234287Sdim
70234287Sdim		for (p = path + count; (c = getc(fp)) > SWITCH; size++)
71234287Sdim			if (c < PARITY)
72234287Sdim				p++;
73234287Sdim			else {
74234287Sdim				big++;
75276479Sdim				p += 2;
76276479Sdim			}
77276479Sdim
78234287Sdim		p++;
79234287Sdim		lines++;
80234287Sdim		chars += (p - path);
81234287Sdim	}
82234287Sdim
83234287Sdim	(void)printf("\nDatabase: %s\n", path_fcodes);
84234287Sdim	(void)printf("Compression: Front: %2.2f%%, ",
85234287Sdim		     (float)(100 * (size + big)) / chars);
86234287Sdim	(void)printf("Bigram: %2.2f%%, ", (float)(100 * (size - big)) / size);
87234287Sdim	(void)printf("Total: %2.2f%%\n", (float)(100 * size) / chars);
88234287Sdim	(void)printf("Filenames: %d, ", lines);
89234287Sdim	(void)printf("Chars: %d\n", chars);
90234287Sdim	(void)printf("Database size: %d, ", size);
91234287Sdim	(void)printf("Bigram chars: %d\n", big);
92234287Sdim
93234287Sdim}
94234287Sdim#endif /* _LOCATE_STATISTIC_ */
95234287Sdim
96234287Sdim
97234287Sdimvoid
98234287Sdim#ifdef FF_MMAP
99234287Sdim
100234287Sdim
101234287Sdim#ifdef FF_ICASE
102234287Sdimfastfind_mmap_icase
103276479Sdim#else
104243830Sdimfastfind_mmap
105234287Sdim#endif
106234287Sdim(pathpart, paddr, len, database)
107243830Sdim	char *pathpart; 	/* search string */
108243830Sdim	caddr_t paddr;  	/* mmap pointer */
109234287Sdim	int len;        	/* length of database */
110234287Sdim	char *database; 	/* for error message */
111234287Sdim
112234287Sdim
113243830Sdim#else /* MMAP */
114243830Sdim
115243830Sdim
116234287Sdim#ifdef FF_ICASE
117234287Sdimfastfind_icase
118234287Sdim#else /* !FF_ICASE */
119234287Sdimfastfind
120234287Sdim#endif /* FF_ICASE */
121234287Sdim
122234287Sdim(fp, pathpart, database)
123234287Sdim	FILE *fp;               /* open database */
124234287Sdim	char *pathpart;		/* search string */
125234287Sdim	char *database;		/* for error message */
126234287Sdim
127234287Sdim
128234287Sdim#endif /* MMAP */
129243830Sdim
130234287Sdim{
131234287Sdim	register u_char *p, *s, *patend, *q, *foundchar;
132243830Sdim	register int c, cc;
133234287Sdim	int count, found, globflag;
134234287Sdim	u_char *cutoff;
135234287Sdim	u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
136234287Sdim
137234287Sdim#ifdef FF_ICASE
138243830Sdim	/* use a lookup table for case insensitive search */
139243830Sdim	u_char table[UCHAR_MAX];
140243830Sdim
141234287Sdim	tolower_word(pathpart);
142234287Sdim#endif
143234287Sdim
144234287Sdim	/* init bigram table */
145234287Sdim#ifdef FF_MMAP
146276479Sdim	if (len < (2*NBG)) {
147276479Sdim		(void)fprintf(stderr, "database to small: %s\n", database);
148276479Sdim		exit(1);
149234287Sdim	}
150234287Sdim
151234287Sdim	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++, len-= 2) {
152234287Sdim		p[c] = check_bigram_char(*paddr++);
153234287Sdim		s[c] = check_bigram_char(*paddr++);
154234287Sdim	}
155234287Sdim#else
156288943Sdim	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) {
157288943Sdim		p[c] = check_bigram_char(getc(fp));
158288943Sdim		s[c] = check_bigram_char(getc(fp));
159234287Sdim	}
160234287Sdim#endif
161234287Sdim
162234287Sdim	/* find optimal (last) char for searching */
163234287Sdim	for (p = pathpart; *p != '\0'; p++)
164234287Sdim		if (index(LOCATE_REG, *p) != NULL)
165234287Sdim			break;
166234287Sdim
167276479Sdim	if (*p == '\0')
168234287Sdim		globflag = 0;
169234287Sdim	else
170234287Sdim		globflag = 1;
171234287Sdim
172234287Sdim	p = pathpart;
173234287Sdim	patend = patprep(p);
174234287Sdim	cc = *patend;
175234287Sdim
176#ifdef FF_ICASE
177	/* set patend char to true */
178	table[TOLOWER(*patend)] = 1;
179	table[toupper(*patend)] = 1;
180#endif
181
182
183	/* main loop */
184	found = count = 0;
185	foundchar = 0;
186
187#ifdef FF_MMAP
188	for (c = (u_char)*paddr++; len-- > 0; ) {
189#else
190	for (c = getc(fp); c != EOF; ) {
191#endif
192
193		/* go forward or backward */
194		if (c == SWITCH) { /* big step, an integer */
195#ifdef FF_MMAP
196			count += getwm(paddr) - OFFSET;
197			len -= INTSIZE; paddr += INTSIZE;
198#else
199			count +=  getwf(fp) - OFFSET;
200#endif
201		} else {	   /* slow step, =< 14 chars */
202			count += c - OFFSET;
203		}
204
205		/* overlay old path */
206		p = path + count;
207		foundchar = p - 1;
208#ifdef FF_MMAP
209		for (; (c = (u_char)*paddr++) > SWITCH; len--)
210#else
211		for (; (c = getc(fp)) > SWITCH; )
212#endif
213
214			if (c < PARITY) {
215#ifdef FF_ICASE
216				if (table[c])
217#else
218				if (c == cc)
219#endif
220					foundchar = p;
221				*p++ = c;
222			}
223			else {
224				/* bigrams are parity-marked */
225				TO7BIT(c);
226
227#ifndef FF_ICASE
228				if (bigram1[c] == cc ||
229				    bigram2[c] == cc)
230#else
231
232					if (table[bigram1[c]] ||
233					    table[bigram2[c]])
234#endif
235						foundchar = p + 1;
236
237				*p++ = bigram1[c];
238				*p++ = bigram2[c];
239			}
240
241
242		if (found) {                     /* previous line matched */
243			cutoff = path;
244			*p-- = '\0';
245			foundchar = p;
246		} else if (foundchar >= path + count) { /* a char matched */
247			*p-- = '\0';
248			cutoff = path + count;
249		} else                           /* nothing to do */
250			continue;
251
252		found = 0;
253		for (s = foundchar; s >= cutoff; s--) {
254			if (*s == cc
255#ifdef FF_ICASE
256			    || TOLOWER(*s) == cc
257#endif
258			    ) {	/* fast first char check */
259				for (p = patend - 1, q = s - 1; *p != '\0';
260				     p--, q--)
261					if (*q != *p
262#ifdef FF_ICASE
263					    && TOLOWER(*q) != *p
264#endif
265					    )
266						break;
267				if (*p == '\0') {   /* fast match success */
268					found = 1;
269					if (!globflag || !fnmatch(pathpart, path, 0)) {
270						if (f_silent)
271							counter++;
272						else if (f_limit) {
273							counter++;
274							if (f_limit >= counter)
275								(void)puts(path);
276							else  {
277								(void)fprintf(stderr, "[show only %d lines]\n", counter - 1);
278								exit(0);
279							}
280						} else
281							(void)puts(path);
282					}
283					break;
284				}
285			}
286		}
287	}
288}
289