look.c revision 1590
1139749Simp/*-
271316Simp * Copyright (c) 1991, 1993
371316Simp *	The Regents of the University of California.  All rights reserved.
471316Simp *
571316Simp * This code is derived from software contributed to Berkeley by
671316Simp * David Hitz of Auspex Systems, Inc.
771316Simp *
871316Simp * Redistribution and use in source and binary forms, with or without
971316Simp * modification, are permitted provided that the following conditions
1071316Simp * are met:
1171316Simp * 1. Redistributions of source code must retain the above copyright
1271316Simp *    notice, this list of conditions and the following disclaimer.
1371316Simp * 2. Redistributions in binary form must reproduce the above copyright
1471316Simp *    notice, this list of conditions and the following disclaimer in the
1571316Simp *    documentation and/or other materials provided with the distribution.
1671316Simp * 3. All advertising materials mentioning features or use of this software
1771316Simp *    must display the following acknowledgement:
1871316Simp *	This product includes software developed by the University of
1971316Simp *	California, Berkeley and its contributors.
2071316Simp * 4. Neither the name of the University nor the names of its contributors
2171316Simp *    may be used to endorse or promote products derived from this software
2271316Simp *    without specific prior written permission.
2371316Simp *
2471316Simp * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2571316Simp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2671316Simp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2771316Simp * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2871316Simp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2971316Simp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3071316Simp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3171316Simp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3271316Simp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3371316Simp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3471316Simp * SUCH DAMAGE.
3571316Simp */
3671316Simp
3771316Simp#ifndef lint
38140927Simpstatic char copyright[] =
39147256Sbrooks"@(#) Copyright (c) 1991, 1993\n\
4071316Simp	The Regents of the University of California.  All rights reserved.\n";
41140927Simp#endif /* not lint */
42140927Simp
4371316Simp#ifndef lint
44140927Simpstatic char sccsid[] = "@(#)look.c	8.1 (Berkeley) 6/14/93";
45140927Simp#endif /* not lint */
4671316Simp
47155526Simp/*
48147256Sbrooks * look -- find lines in a sorted list.
49140927Simp *
5071316Simp * The man page said that TABs and SPACEs participate in -d comparisons.
5171316Simp * In fact, they were ignored.  This implements historic practice, not
5271316Simp * the manual page.
5371316Simp */
5471316Simp
55140927Simp#include <sys/types.h>
5671316Simp#include <sys/mman.h>
57140927Simp#include <sys/stat.h>
58140927Simp
59140927Simp#include <limits.h>
6071316Simp#include <errno.h>
61140926Simp#include <fcntl.h>
62140926Simp#include <stdio.h>
63140927Simp#include <stdlib.h>
6471316Simp#include <string.h>
65140927Simp#include <ctype.h>
66140927Simp#include "pathnames.h"
67140927Simp
6871316Simp/*
69140927Simp * FOLD and DICT convert characters to a normal form for comparison,
70140927Simp * according to the user specified flags.
7171316Simp *
7271316Simp * DICT expects integers because it uses a non-character value to
7371316Simp * indicate a character which should not participate in comparisons.
7471316Simp */
7571316Simp#define	EQUAL		0
76121816Sbrooks#define	GREATER		1
7771316Simp#define	LESS		(-1)
78140888Simp#define NO_COMPARE	(-2)
7971316Simp
8071316Simp#define	FOLD(c)	(isascii(c) && isupper(c) ? tolower(c) : (c))
8171316Simp#define	DICT(c)	(isascii(c) && isalnum(c) ? (c) : NO_COMPARE)
8271316Simp
83int dflag, fflag;
84
85char	*binary_search __P((char *, char *, char *));
86int	 compare __P((char *, char *, char *));
87void	 err __P((const char *fmt, ...));
88char	*linear_search __P((char *, char *, char *));
89int	 look __P((char *, char *, char *));
90void	 print_from __P((char *, char *, char *));
91
92static void usage __P((void));
93
94main(argc, argv)
95	int argc;
96	char *argv[];
97{
98	struct stat sb;
99	int ch, fd, termchar;
100	char *back, *file, *front, *string, *p;
101
102	file = _PATH_WORDS;
103	termchar = '\0';
104	while ((ch = getopt(argc, argv, "dft:")) != EOF)
105		switch(ch) {
106		case 'd':
107			dflag = 1;
108			break;
109		case 'f':
110			fflag = 1;
111			break;
112		case 't':
113			termchar = *optarg;
114			break;
115		case '?':
116		default:
117			usage();
118		}
119	argc -= optind;
120	argv += optind;
121
122	switch (argc) {
123	case 2:				/* Don't set -df for user. */
124		string = *argv++;
125		file = *argv;
126		break;
127	case 1:				/* But set -df by default. */
128		dflag = fflag = 1;
129		string = *argv;
130		break;
131	default:
132		usage();
133	}
134
135	if (termchar != '\0' && (p = strchr(string, termchar)) != NULL)
136		*++p = '\0';
137
138	if ((fd = open(file, O_RDONLY, 0)) < 0 || fstat(fd, &sb))
139		err("%s: %s", file, strerror(errno));
140	if (sb.st_size > SIZE_T_MAX)
141		err("%s: %s", file, strerror(EFBIG));
142	if ((front = mmap(NULL,
143	    (size_t)sb.st_size, PROT_READ, 0, fd, (off_t)0)) == NULL)
144		err("%s: %s", file, strerror(errno));
145	back = front + sb.st_size;
146	exit(look(string, front, back));
147}
148
149look(string, front, back)
150	char *string, *front, *back;
151{
152	register int ch;
153	register char *readp, *writep;
154
155	/* Reformat string string to avoid doing it multiple times later. */
156	for (readp = writep = string; ch = *readp++;) {
157		if (fflag)
158			ch = FOLD(ch);
159		if (dflag)
160			ch = DICT(ch);
161		if (ch != NO_COMPARE)
162			*(writep++) = ch;
163	}
164	*writep = '\0';
165
166	front = binary_search(string, front, back);
167	front = linear_search(string, front, back);
168
169	if (front)
170		print_from(string, front, back);
171	return (front ? 0 : 1);
172}
173
174
175/*
176 * Binary search for "string" in memory between "front" and "back".
177 *
178 * This routine is expected to return a pointer to the start of a line at
179 * *or before* the first word matching "string".  Relaxing the constraint
180 * this way simplifies the algorithm.
181 *
182 * Invariants:
183 * 	front points to the beginning of a line at or before the first
184 *	matching string.
185 *
186 * 	back points to the beginning of a line at or after the first
187 *	matching line.
188 *
189 * Base of the Invariants.
190 * 	front = NULL;
191 *	back = EOF;
192 *
193 * Advancing the Invariants:
194 *
195 * 	p = first newline after halfway point from front to back.
196 *
197 * 	If the string at "p" is not greater than the string to match,
198 *	p is the new front.  Otherwise it is the new back.
199 *
200 * Termination:
201 *
202 * 	The definition of the routine allows it return at any point,
203 *	since front is always at or before the line to print.
204 *
205 * 	In fact, it returns when the chosen "p" equals "back".  This
206 *	implies that there exists a string is least half as long as
207 *	(back - front), which in turn implies that a linear search will
208 *	be no more expensive than the cost of simply printing a string or two.
209 *
210 * 	Trying to continue with binary search at this point would be
211 *	more trouble than it's worth.
212 */
213#define	SKIP_PAST_NEWLINE(p, back) \
214	while (p < back && *p++ != '\n');
215
216char *
217binary_search(string, front, back)
218	register char *string, *front, *back;
219{
220	register char *p;
221
222	p = front + (back - front) / 2;
223	SKIP_PAST_NEWLINE(p, back);
224
225	/*
226	 * If the file changes underneath us, make sure we don't
227	 * infinitely loop.
228	 */
229	while (p < back && back > front) {
230		if (compare(string, p, back) == GREATER)
231			front = p;
232		else
233			back = p;
234		p = front + (back - front) / 2;
235		SKIP_PAST_NEWLINE(p, back);
236	}
237	return (front);
238}
239
240/*
241 * Find the first line that starts with string, linearly searching from front
242 * to back.
243 *
244 * Return NULL for no such line.
245 *
246 * This routine assumes:
247 *
248 * 	o front points at the first character in a line.
249 *	o front is before or at the first line to be printed.
250 */
251char *
252linear_search(string, front, back)
253	char *string, *front, *back;
254{
255	while (front < back) {
256		switch (compare(string, front, back)) {
257		case EQUAL:		/* Found it. */
258			return (front);
259			break;
260		case LESS:		/* No such string. */
261			return (NULL);
262			break;
263		case GREATER:		/* Keep going. */
264			break;
265		}
266		SKIP_PAST_NEWLINE(front, back);
267	}
268	return (NULL);
269}
270
271/*
272 * Print as many lines as match string, starting at front.
273 */
274void
275print_from(string, front, back)
276	register char *string, *front, *back;
277{
278	for (; front < back && compare(string, front, back) == EQUAL; ++front) {
279		for (; front < back && *front != '\n'; ++front)
280			if (putchar(*front) == EOF)
281				err("stdout: %s", strerror(errno));
282		if (putchar('\n') == EOF)
283			err("stdout: %s", strerror(errno));
284	}
285}
286
287/*
288 * Return LESS, GREATER, or EQUAL depending on how the string1 compares with
289 * string2 (s1 ??? s2).
290 *
291 * 	o Matches up to len(s1) are EQUAL.
292 *	o Matches up to len(s2) are GREATER.
293 *
294 * Compare understands about the -f and -d flags, and treats comparisons
295 * appropriately.
296 *
297 * The string "s1" is null terminated.  The string s2 is '\n' terminated (or
298 * "back" terminated).
299 */
300int
301compare(s1, s2, back)
302	register char *s1, *s2, *back;
303{
304	register int ch;
305
306	for (; *s1 && s2 < back && *s2 != '\n'; ++s1, ++s2) {
307		ch = *s2;
308		if (fflag)
309			ch = FOLD(ch);
310		if (dflag)
311			ch = DICT(ch);
312
313		if (ch == NO_COMPARE) {
314			++s2;		/* Ignore character in comparison. */
315			continue;
316		}
317		if (*s1 != ch)
318			return (*s1 < ch ? LESS : GREATER);
319	}
320	return (*s1 ? GREATER : EQUAL);
321}
322
323static void
324usage()
325{
326	(void)fprintf(stderr, "usage: look [-df] [-t char] string [file]\n");
327	exit(2);
328}
329
330#if __STDC__
331#include <stdarg.h>
332#else
333#include <varargs.h>
334#endif
335
336void
337#if __STDC__
338err(const char *fmt, ...)
339#else
340err(fmt, va_alist)
341	char *fmt;
342	va_dcl
343#endif
344{
345	va_list ap;
346#if __STDC__
347	va_start(ap, fmt);
348#else
349	va_start(ap);
350#endif
351	(void)fprintf(stderr, "look: ");
352	(void)vfprintf(stderr, fmt, ap);
353	va_end(ap);
354	(void)fprintf(stderr, "\n");
355	exit(2);
356	/* NOTREACHED */
357}
358