util.c revision 18829
1/*
2 * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
3 * Copyright (c) 1989, 1993
4 *	The Regents of the University of California.  All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * James A. Woods.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * $Id: util.c,v 1.1 1996/08/31 23:14:54 wosch Exp $
38 */
39
40
41#include <stdlib.h>
42#include <string.h>
43#include <err.h>
44#include <sys/param.h>
45#include <stdio.h>
46
47#include "locate.h"
48
49char 	**colon __P((char **, char*, char*));
50char 	*patprep __P((char *));
51void print_matches __P((u_int));
52u_char 	*tolower_word __P((u_char *));
53int 	getwm __P((caddr_t));
54int 	getwf __P((FILE *));
55int	check_bigram_char __P((int));
56
57/*
58 * Validate bigram chars. If the test failed the database is corrupt
59 * or the database is obviously not a locate database.
60 */
61int
62check_bigram_char(ch)
63	int ch;
64{
65	/* legal bigram: 0, ASCII_MIN ... ASCII_MAX */
66	if (ch == 0 ||
67	    (ch >= ASCII_MIN && ch <= ASCII_MAX))
68		return(ch);
69
70	(void)fprintf(stderr, "locate database header corrupt, bigram ");
71	(void)fprintf(stderr, "char outside 0, %d-%d: %d\n",
72                      ASCII_MIN, ASCII_MAX, ch);
73	exit(1);
74}
75
76/* split a colon separated string into a char vector
77 *
78 * "bla:foo" -> {"foo", "bla"}
79 * "bla:"    -> {"foo", dot}
80 * "bla"     -> {"bla"}
81 * ""	     -> do nothing
82 *
83 */
84char **
85colon(dbv, path, dot)
86	char **dbv;
87	char *path;
88	char *dot; /* default for single ':' */
89{
90	int vlen, slen;
91	char *c, *ch, *p;
92	char **pv;
93
94	if (dbv == NULL) {
95		if ((dbv = malloc(sizeof(char **))) == NULL)
96			err(1, "malloc");
97		*dbv = NULL;
98	}
99
100	/* empty string */
101	if (*path == '\0') {
102		(void)fprintf(stderr, "empty database name, ignored\n");
103		return(dbv);
104	}
105
106	/* length of string vector */
107	for(vlen = 0, pv = dbv; *pv != NULL; pv++, vlen++);
108
109	for (ch = c = path; ; ch++) {
110		if (*ch == ':' ||
111		    (!*ch && !(*(ch - 1) == ':' && ch == 1+ path))) {
112			/* single colon -> dot */
113			if (ch == c)
114				p = dot;
115			else {
116				/* a string */
117				slen = ch - c;
118				if ((p = malloc(sizeof(char) * (slen + 1)))
119				    == NULL)
120					err(1, "malloc");
121				bcopy(c, p, slen);
122				*(p + slen) = '\0';
123			}
124			/* increase dbv with element p */
125			if ((dbv = realloc(dbv, sizeof(char **) * (vlen + 2)))
126			    == NULL)
127				err(1, "realloc");
128			*(dbv + vlen) = p;
129			*(dbv + ++vlen) = NULL;
130			c = ch + 1;
131		}
132		if (*ch == '\0')
133			break;
134	}
135	return (dbv);
136}
137
138void
139print_matches(counter)
140	u_int counter;
141{
142	(void)printf("%d\n", counter);
143}
144
145
146/*
147 * extract last glob-free subpattern in name for fast pre-match; prepend
148 * '\0' for backwards match; return end of new pattern
149 */
150static char globfree[100];
151
152char *
153patprep(name)
154	char *name;
155{
156	register char *endmark, *p, *subp;
157
158	subp = globfree;
159	*subp++ = '\0';   /* set first element to '\0' */
160	p = name + strlen(name) - 1;
161
162	/* skip trailing metacharacters */
163	for (; p >= name; p--)
164		if (index(LOCATE_REG, *p) == NULL)
165			break;
166
167	/*
168	 * check if maybe we are in a character class
169	 *
170	 * 'foo.[ch]'
171	 *        |----< p
172	 */
173	if (p >= name &&
174	    (index(p, '[') != NULL || index(p, ']') != NULL)) {
175		for (p = name; *p != '\0'; p++)
176			if (*p == ']' || *p == '[')
177				break;
178		p--;
179
180		/*
181		 * cannot find a non-meta character, give up
182		 * '*\*[a-z]'
183		 *    |-------< p
184		 */
185		if (p >= name && index(LOCATE_REG, *p) != NULL)
186			p = name - 1;
187	}
188
189	if (p < name)
190		/* only meta chars: "???", force '/' search */
191		*subp++ = '/';
192
193	else {
194		for (endmark = p; p >= name; p--)
195			if (index(LOCATE_REG, *p) != NULL)
196				break;
197		for (++p;
198		    (p <= endmark) && subp < (globfree + sizeof(globfree));)
199			*subp++ = *p++;
200	}
201	*subp = '\0';
202	return(--subp);
203}
204
205/* tolower word */
206u_char *
207tolower_word(word)
208	u_char *word;
209{
210	register u_char *p;
211
212	for(p = word; *p != '\0'; p++)
213		*p = TOLOWER(*p);
214
215	return(word);
216}
217
218
219/*
220 * Read integer from mmap pointer.
221 * Essential a simple  ``return *(int *)p'' but avoid sigbus
222 * for integer alignment (SunOS 4.x, 5.x).
223 *
224 * Convert network byte order to host byte order if neccessary.
225 * So we can read on FreeBSD/i386 (little endian) a locate database
226 * which was built on SunOS/sparc (big endian).
227 */
228
229int
230getwm(p)
231	caddr_t p;
232{
233	static char buf[INTSIZE];
234	register int i;
235
236	for (i = 0; i < INTSIZE; i++)
237		buf[i] = *p++;
238
239	i = *(int *)buf;
240
241	if (i > MAXPATHLEN || i < -(MAXPATHLEN)) {
242		i = ntohl(i);
243		if (i > MAXPATHLEN || i < -(MAXPATHLEN)) {
244			(void)fprintf(stderr,
245				"integer out of +-MAXPATHLEN (%d): %d\n",
246			    	MAXPATHLEN, i);
247			exit(1);
248		}
249	}
250	return(i);
251}
252
253/*
254 * Read integer from stream.
255 *
256 * Convert network byte order to host byte order if neccessary.
257 * So we can read on FreeBSD/i386 (little endian) a locate database
258 * which was built on SunOS/sparc (big endian).
259 */
260
261int
262getwf(fp)
263	FILE *fp;
264{
265	register int word;
266
267	word = getw(fp);
268
269	if (word > MAXPATHLEN || word < -(MAXPATHLEN)) {
270		word = ntohl(word);
271		if (word > MAXPATHLEN || word < -(MAXPATHLEN)) {
272			(void)fprintf(stderr,
273				"integer out of +-MAXPATHLEN (%d): %d\n",
274				MAXPATHLEN, word);
275			exit(1);
276		}
277	}
278	return(word);
279}
280