fastfind.c revision 27574
1116742Ssam/*
2116904Ssam * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
3186904Ssam * Copyright (c) 1989, 1993
4116742Ssam *	The Regents of the University of California.  All rights reserved.
5116742Ssam *
6116742Ssam * This code is derived from software contributed to Berkeley by
7116742Ssam * James A. Woods.
8116742Ssam *
9116742Ssam * Redistribution and use in source and binary forms, with or without
10116904Ssam * modification, are permitted provided that the following conditions
11116904Ssam * are met:
12116904Ssam * 1. Redistributions of source code must retain the above copyright
13116904Ssam *    notice, this list of conditions and the following disclaimer.
14116742Ssam * 2. Redistributions in binary form must reproduce the above copyright
15116904Ssam *    notice, this list of conditions and the following disclaimer in the
16116904Ssam *    documentation and/or other materials provided with the distribution.
17116904Ssam * 3. All advertising materials mentioning features or use of this software
18116904Ssam *    must display the following acknowledgement:
19116904Ssam *	This product includes software developed by the University of
20116904Ssam *	California, Berkeley and its contributors.
21116904Ssam * 4. Neither the name of the University nor the names of its contributors
22116904Ssam *    may be used to endorse or promote products derived from this software
23116904Ssam *    without specific prior written permission.
24116904Ssam *
25116742Ssam * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26116742Ssam * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27116742Ssam * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28116742Ssam * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29116742Ssam * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30116742Ssam * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31116742Ssam * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32116742Ssam * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33178354Ssam * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34116742Ssam * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35116742Ssam * SUCH DAMAGE.
36191746Sthompsa *
37116742Ssam * $Id: fastfind.c,v 1.9 1997/02/22 19:55:45 peter Exp $
38295126Sglebius */
39116742Ssam
40287197Sglebius
41116742Ssam#ifndef _LOCATE_STATISTIC_
42283529Sglebius#define _LOCATE_STATISTIC_
43283529Sglebius
44116742Ssamvoid
45257176Sglebiusstatistic (fp, path_fcodes)
46178354Ssam	FILE *fp;               /* open database */
47116742Ssam	char *path_fcodes;  	/* for error message */
48178354Ssam{
49116742Ssam	register int lines, chars, size, big, zwerg;
50116742Ssam	register u_char *p, *s;
51116742Ssam	register int c;
52178354Ssam	int count, umlaut;
53190391Ssam	u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
54190391Ssam
55190391Ssam	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) {
56206358Srpaulo		p[c] = check_bigram_char(getc(fp));
57116742Ssam		s[c] = check_bigram_char(getc(fp));
58116742Ssam	}
59116742Ssam
60178955Ssam	lines = chars = big = zwerg = umlaut = 0;
61178955Ssam	size = NBG + NBG;
62178955Ssam
63178955Ssam	for (c = getc(fp), count = 0; c != EOF; size++) {
64178955Ssam		if (c == SWITCH) {
65178955Ssam			count += getwf(fp) - OFFSET;
66178955Ssam			size += sizeof(int);
67178955Ssam			zwerg++;
68178955Ssam		} else
69188782Ssam			count += c - OFFSET;
70188782Ssam
71178955Ssam		for (p = path + count; (c = getc(fp)) > SWITCH; size++)
72178955Ssam			if (c < PARITY) {
73116742Ssam				if (c == UMLAUT) {
74178957Ssam					c = getc(fp);
75178957Ssam					size++;
76178957Ssam					umlaut++;
77178957Ssam				}
78178957Ssam				p++;
79178957Ssam			} else {
80178957Ssam				/* bigram char */
81178957Ssam				big++;
82195618Srpaulo				p += 2;
83195618Srpaulo			}
84195618Srpaulo
85178957Ssam		p++;
86178957Ssam		lines++;
87283566Sglebius		chars += (p - path);
88178354Ssam	}
89116742Ssam
90178354Ssam	(void)printf("\nDatabase: %s\n", path_fcodes);
91193655Ssam	(void)printf("Compression: Front: %2.2f%%, ",
92178354Ssam		     (float)(100 * (size + big - (2 * NBG))) / chars);
93178354Ssam	(void)printf("Bigram: %2.2f%%, ", (float)(100 * (size - big)) / size);
94178354Ssam	(void)printf("Total: %2.2f%%\n",
95178354Ssam		     (float)(100 * (size - (2 * NBG))) / chars);
96178354Ssam	(void)printf("Filenames: %d, ", lines);
97178354Ssam	(void)printf("Characters: %d, ", chars);
98283568Sglebius	(void)printf("Database size: %d\n", size);
99178354Ssam	(void)printf("Bigram characters: %d, ", big);
100178354Ssam	(void)printf("Integers: %d, ", zwerg);
101178354Ssam	(void)printf("8-Bit characters: %d\n", umlaut);
102164645Ssam
103164645Ssam}
104164645Ssam#endif /* _LOCATE_STATISTIC_ */
105164645Ssam
106164645Ssam
107164645Ssamvoid
108165569Ssam#ifdef FF_MMAP
109165569Ssam
110165569Ssam
111165569Ssam#ifdef FF_ICASE
112164645Ssamfastfind_mmap_icase
113164645Ssam#else
114164645Ssamfastfind_mmap
115164645Ssam#endif /* FF_ICASE */
116164645Ssam(pathpart, paddr, len, database)
117164645Ssam	char *pathpart; 	/* search string */
118164645Ssam	caddr_t paddr;  	/* mmap pointer */
119140915Ssam	int len;        	/* length of database */
120165569Ssam	char *database; 	/* for error message */
121165569Ssam
122165569Ssam
123165569Ssam#else /* MMAP */
124287197Sglebius
125165569Ssam
126116742Ssam#ifdef FF_ICASE
127165569Ssamfastfind_icase
128188782Ssam#else
129165574Ssamfastfind
130165569Ssam#endif /* FF_ICASE */
131116742Ssam
132116742Ssam(fp, pathpart, database)
133116742Ssam	FILE *fp;               /* open database */
134186107Ssam	char *pathpart;		/* search string */
135170530Ssam	char *database;		/* for error message */
136116742Ssam
137178354Ssam
138167468Ssam#endif /* MMAP */
139170530Ssam
140116742Ssam{
141170530Ssam	register u_char *p, *s, *patend, *q, *foundchar;
142187796Ssam	register int c, cc;
143187796Ssam	int count, found, globflag;
144187796Ssam	u_char *cutoff;
145187796Ssam	u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
146187796Ssam
147187796Ssam#ifdef FF_ICASE
148187796Ssam	/* use a lookup table for case insensitive search */
149187796Ssam	u_char table[UCHAR_MAX + 1];
150187796Ssam
151187796Ssam	tolower_word(pathpart);
152187796Ssam#endif /* FF_ICASE*/
153187796Ssam
154187796Ssam	/* init bigram table */
155187796Ssam#ifdef FF_MMAP
156187796Ssam	if (len < (2*NBG))
157170530Ssam		errx(1, "database too small: %s", database);
158170530Ssam
159170530Ssam	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++, len-= 2) {
160170530Ssam		p[c] = check_bigram_char(*paddr++);
161170530Ssam		s[c] = check_bigram_char(*paddr++);
162170530Ssam	}
163170530Ssam#else
164170530Ssam	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) {
165170530Ssam		p[c] = check_bigram_char(getc(fp));
166170530Ssam		s[c] = check_bigram_char(getc(fp));
167170530Ssam	}
168170530Ssam#endif /* FF_MMAP */
169170530Ssam
170170530Ssam	/* find optimal (last) char for searching */
171170530Ssam	for (p = pathpart; *p != '\0'; p++)
172170530Ssam		if (index(LOCATE_REG, *p) != NULL)
173170530Ssam			break;
174170530Ssam
175188782Ssam	if (*p == '\0')
176188782Ssam		globflag = 0;
177188782Ssam	else
178188782Ssam		globflag = 1;
179170530Ssam
180170530Ssam	p = pathpart;
181170530Ssam	patend = patprep(p);
182170530Ssam	cc = *patend;
183116742Ssam
184170530Ssam#ifdef FF_ICASE
185170530Ssam	/* set patend char to true */
186170530Ssam        for (c = 0; c < UCHAR_MAX + 1; c++)
187164645Ssam                table[c] = 0;
188178354Ssam
189178354Ssam	table[TOLOWER(*patend)] = 1;
190178354Ssam	table[toupper(*patend)] = 1;
191178354Ssam#endif /* FF_ICASE */
192170530Ssam
193172233Ssam
194178354Ssam	/* main loop */
195170530Ssam	found = count = 0;
196170530Ssam	foundchar = 0;
197190532Ssam
198170530Ssam#ifdef FF_MMAP
199164645Ssam	c = (u_char)*paddr++; len--;
200165569Ssam	for (; len > 0; ) {
201165569Ssam#else
202165569Ssam	c = getc(fp);
203165569Ssam	for (; c != EOF; ) {
204165569Ssam#endif /* FF_MMAP */
205187897Ssam
206188782Ssam		/* go forward or backward */
207188782Ssam		if (c == SWITCH) { /* big step, an integer */
208188774Ssam#ifdef FF_MMAP
209188774Ssam			count += getwm(paddr) - OFFSET;
210165569Ssam			len -= INTSIZE; paddr += INTSIZE;
211165569Ssam#else
212219596Sbschmidt			count +=  getwf(fp) - OFFSET;
213219596Sbschmidt#endif /* FF_MMAP */
214219596Sbschmidt		} else {	   /* slow step, =< 14 chars */
215219596Sbschmidt			count += c - OFFSET;
216219596Sbschmidt		}
217219596Sbschmidt
218219596Sbschmidt		/* overlay old path */
219219596Sbschmidt		p = path + count;
220219596Sbschmidt		foundchar = p - 1;
221165569Ssam
222165569Ssam		for (;;) {
223165569Ssam#ifdef FF_MMAP
224165569Ssam			c = (u_char)*paddr++;
225165569Ssam		        len--;
226165569Ssam#else
227178354Ssam			c = getc(fp);
228283540Sglebius#endif /* FF_MMAP */
229178354Ssam			/*
230283540Sglebius			 * == UMLAUT: 8 bit char followed
231283540Sglebius			 * <= SWITCH: offset
232178354Ssam			 * >= PARITY: bigram
233178354Ssam			 * rest:      single ascii char
234178354Ssam			 *
235283540Sglebius			 * offset < SWITCH < UMLAUT < ascii < PARITY < bigram
236178354Ssam			 */
237283540Sglebius			if (c < PARITY) {
238283540Sglebius				if (c <= UMLAUT) {
239178354Ssam					if (c == UMLAUT) {
240178354Ssam#ifdef FF_MMAP
241178521Ssam						c = (u_char)*paddr++;
242233452Sadrian						len--;
243233452Sadrian#else
244233452Sadrian						c = getc(fp);
245283529Sglebius#endif /* FF_MMAP */
246233452Sadrian
247233452Sadrian					} else
248283529Sglebius						break; /* SWITCH */
249283529Sglebius				}
250283529Sglebius#ifdef FF_ICASE
251283529Sglebius				if (table[c])
252283529Sglebius#else
253283529Sglebius				if (c == cc)
254283529Sglebius#endif /* FF_ICASE */
255283529Sglebius					foundchar = p;
256283529Sglebius				*p++ = c;
257283529Sglebius			}
258283529Sglebius			else {
259283529Sglebius				/* bigrams are parity-marked */
260283529Sglebius				TO7BIT(c);
261287197Sglebius
262287197Sglebius#ifndef FF_ICASE
263287197Sglebius				if (bigram1[c] == cc ||
264287197Sglebius				    bigram2[c] == cc)
265287197Sglebius#else
266287197Sglebius
267287197Sglebius					if (table[bigram1[c]] ||
268287197Sglebius					    table[bigram2[c]])
269296114Savos#endif /* FF_ICASE */
270287197Sglebius						foundchar = p + 1;
271287197Sglebius
272287197Sglebius				*p++ = bigram1[c];
273296114Savos				*p++ = bigram2[c];
274296114Savos			}
275296114Savos		}
276296114Savos
277296114Savos		if (found) {                     /* previous line matched */
278287197Sglebius			cutoff = path;
279287197Sglebius			*p-- = '\0';
280287197Sglebius			foundchar = p;
281296114Savos		} else if (foundchar >= path + count) { /* a char matched */
282287197Sglebius			*p-- = '\0';
283287197Sglebius			cutoff = path + count;
284287197Sglebius		} else                           /* nothing to do */
285296114Savos			continue;
286296114Savos
287287197Sglebius		found = 0;
288287197Sglebius		for (s = foundchar; s >= cutoff; s--) {
289287197Sglebius			if (*s == cc
290287197Sglebius#ifdef FF_ICASE
291287197Sglebius			    || TOLOWER(*s) == cc
292287197Sglebius#endif /* FF_ICASE */
293287197Sglebius			    ) {	/* fast first char check */
294178354Ssam				for (p = patend - 1, q = s - 1; *p != '\0';
295178354Ssam				     p--, q--)
296178354Ssam					if (*q != *p
297178354Ssam#ifdef FF_ICASE
298165569Ssam					    && TOLOWER(*q) != *p
299287197Sglebius#endif /* FF_ICASE */
300165569Ssam					    )
301165569Ssam						break;
302283529Sglebius				if (*p == '\0') {   /* fast match success */
303283529Sglebius					found = 1;
304178354Ssam					if (!globflag ||
305191746Sthompsa#ifndef FF_ICASE
306191746Sthompsa					    !fnmatch(pathpart, path, 0))
307191746Sthompsa#else
308191746Sthompsa					    !fnmatch(pathpart, path,
309230447Sadrian						     FNM_CASEFOLD))
310283565Sglebius#endif /* !FF_ICASE */
311283568Sglebius					{
312283568Sglebius						if (f_silent)
313165569Ssam							counter++;
314165569Ssam						else if (f_limit) {
315165569Ssam							counter++;
316165569Ssam							if (f_limit >= counter)
317165569Ssam								(void)puts(path);
318287197Sglebius							else
319170530Ssam								errx(0, "[show only %d lines]", counter - 1);
320178354Ssam						} else
321178354Ssam							(void)puts(path);
322233452Sadrian					}
323116742Ssam					break;
324195379Ssam				}
325155688Ssam			}
326155688Ssam		}
327138568Ssam	}
328138568Ssam}
329170530Ssam