look.c revision 11900
1211088Smjacob/*-
2211088Smjacob * Copyright (c) 1991, 1993
3211088Smjacob *	The Regents of the University of California.  All rights reserved.
4211088Smjacob *
5211088Smjacob * This code is derived from software contributed to Berkeley by
6211088Smjacob * David Hitz of Auspex Systems, Inc.
7211088Smjacob *
8211088Smjacob * Redistribution and use in source and binary forms, with or without
9211088Smjacob * modification, are permitted provided that the following conditions
10211088Smjacob * are met:
11211088Smjacob * 1. Redistributions of source code must retain the above copyright
12211088Smjacob *    notice, this list of conditions and the following disclaimer.
13211088Smjacob * 2. Redistributions in binary form must reproduce the above copyright
14211088Smjacob *    notice, this list of conditions and the following disclaimer in the
15211088Smjacob *    documentation and/or other materials provided with the distribution.
16211088Smjacob * 3. All advertising materials mentioning features or use of this software
17211088Smjacob *    must display the following acknowledgement:
18211088Smjacob *	This product includes software developed by the University of
19211088Smjacob *	California, Berkeley and its contributors.
20211088Smjacob * 4. Neither the name of the University nor the names of its contributors
21211088Smjacob *    may be used to endorse or promote products derived from this software
22211088Smjacob *    without specific prior written permission.
23211088Smjacob *
24211088Smjacob * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25211088Smjacob * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26211088Smjacob * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27211088Smjacob * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28211088Smjacob * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29211088Smjacob * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30211088Smjacob * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31211088Smjacob * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32211088Smjacob * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33211088Smjacob * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34211183Smjacob * SUCH DAMAGE.
35211183Smjacob */
36211183Smjacob
37211183Smjacob#ifndef lint
38211183Smjacobstatic char copyright[] =
39211183Smjacob"@(#) Copyright (c) 1991, 1993\n\
40211183Smjacob	The Regents of the University of California.  All rights reserved.\n";
41211183Smjacob#endif /* not lint */
42211183Smjacob
43211183Smjacob#ifndef lint
44211183Smjacobstatic char sccsid[] = "@(#)look.c	8.1 (Berkeley) 6/14/93";
45211088Smjacob#endif /* not lint */
46211088Smjacob
47211088Smjacob/*
48211088Smjacob * look -- find lines in a sorted list.
49211088Smjacob *
50211088Smjacob * The man page said that TABs and SPACEs participate in -d comparisons.
51211088Smjacob * In fact, they were ignored.  This implements historic practice, not
52211088Smjacob * the manual page.
53211088Smjacob */
54211088Smjacob
55211088Smjacob#include <sys/types.h>
56211088Smjacob#include <sys/mman.h>
57211088Smjacob#include <sys/stat.h>
58211088Smjacob
59211088Smjacob#include <limits.h>
60211088Smjacob#include <locale.h>
61211088Smjacob#include <errno.h>
62211088Smjacob#include <fcntl.h>
63211183Smjacob#include <stdio.h>
64211183Smjacob#include <stdlib.h>
65211088Smjacob#include <string.h>
66211088Smjacob#include <ctype.h>
67211183Smjacob#include "pathnames.h"
68211088Smjacob
69211088Smjacob/*
70211088Smjacob * FOLD and DICT convert characters to a normal form for comparison,
71211088Smjacob * according to the user specified flags.
72211088Smjacob *
73211088Smjacob * DICT expects integers because it uses a non-character value to
74211088Smjacob * indicate a character which should not participate in comparisons.
75211088Smjacob */
76211183Smjacob#define	EQUAL		0
77211088Smjacob#define	GREATER		1
78211088Smjacob#define	LESS		(-1)
79211088Smjacob#define NO_COMPARE	(-2)
80211088Smjacob
81211088Smjacob#define FOLD(c) (isupper(c) ? tolower(c) : (unsigned char) (c))
82211088Smjacob#define DICT(c) (isalnum(c) ? (c) & 0xFF /* int */ : NO_COMPARE)
83211183Smjacob
84211088Smjacobint dflag, fflag;
85211183Smjacob
86211183Smjacobchar    *binary_search __P((unsigned char *, unsigned char *, unsigned char *));
87211088Smjacobint      compare __P((unsigned char *, unsigned char *, unsigned char *));
88211088Smjacobvoid	 err __P((const char *fmt, ...));
89211088Smjacobchar    *linear_search __P((unsigned char *, unsigned char *, unsigned char *));
90211088Smjacobint      look __P((unsigned char *, unsigned char *, unsigned char *));
91211088Smjacobvoid     print_from __P((unsigned char *, unsigned char *, unsigned char *));
92211088Smjacob
93211088Smjacobstatic void usage __P((void));
94211088Smjacob
95211088Smjacobmain(argc, argv)
96211088Smjacob	int argc;
97211088Smjacob	char *argv[];
98211088Smjacob{
99211088Smjacob	struct stat sb;
100211088Smjacob	int ch, fd, termchar;
101211088Smjacob	unsigned char *back, *file, *front, *string, *p;
102211088Smjacob
103211088Smjacob	(void) setlocale(LC_CTYPE, "");
104211088Smjacob
105211088Smjacob	file = _PATH_WORDS;
106211088Smjacob	termchar = '\0';
107211088Smjacob	while ((ch = getopt(argc, argv, "dft:")) != EOF)
108211088Smjacob		switch(ch) {
109211088Smjacob		case 'd':
110211183Smjacob			dflag = 1;
111211088Smjacob			break;
112211088Smjacob		case 'f':
113211088Smjacob			fflag = 1;
114211088Smjacob			break;
115211088Smjacob		case 't':
116211183Smjacob			termchar = *optarg;
117211183Smjacob			break;
118211183Smjacob		case '?':
119211088Smjacob		default:
120211183Smjacob			usage();
121211183Smjacob		}
122211183Smjacob	argc -= optind;
123211183Smjacob	argv += optind;
124211183Smjacob
125211183Smjacob	switch (argc) {
126211183Smjacob	case 2:				/* Don't set -df for user. */
127211088Smjacob		string = *argv++;
128211088Smjacob		file = *argv;
129211088Smjacob		break;
130211088Smjacob	case 1:				/* But set -df by default. */
131211088Smjacob		dflag = fflag = 1;
132211088Smjacob		string = *argv;
133211088Smjacob		break;
134211088Smjacob	default:
135211088Smjacob		usage();
136211088Smjacob	}
137211183Smjacob
138211088Smjacob	if (termchar != '\0' && (p = strchr(string, termchar)) != NULL)
139211088Smjacob		*++p = '\0';
140211183Smjacob
141211088Smjacob	if ((fd = open(file, O_RDONLY, 0)) < 0 || fstat(fd, &sb))
142211088Smjacob		err("%s: %s", file, strerror(errno));
143211088Smjacob	if (sb.st_size > SIZE_T_MAX)
144211088Smjacob		err("%s: %s", file, strerror(EFBIG));
145211088Smjacob	if ((front = mmap(NULL,
146211088Smjacob	    (size_t)sb.st_size, PROT_READ, 0, fd, (off_t)0)) == NULL)
147211088Smjacob		err("%s: %s", file, strerror(errno));
148211088Smjacob	back = front + sb.st_size;
149211088Smjacob	exit(look(string, front, back));
150211088Smjacob}
151211183Smjacob
152211183Smjacoblook(string, front, back)
153211183Smjacob	unsigned char *string, *front, *back;
154211183Smjacob{
155211183Smjacob	register int ch;
156211088Smjacob	register unsigned char *readp, *writep;
157211088Smjacob
158211088Smjacob	/* Reformat string string to avoid doing it multiple times later. */
159211088Smjacob	for (readp = writep = string; ch = *readp++;) {
160211088Smjacob		if (fflag)
161211088Smjacob			ch = FOLD(ch);
162211088Smjacob		if (dflag)
163211088Smjacob			ch = DICT(ch);
164211088Smjacob		if (ch != NO_COMPARE)
165211088Smjacob			*(writep++) = ch;
166211088Smjacob	}
167211088Smjacob	*writep = '\0';
168211088Smjacob
169211088Smjacob	front = binary_search(string, front, back);
170211088Smjacob	front = linear_search(string, front, back);
171211088Smjacob
172211088Smjacob	if (front)
173211088Smjacob		print_from(string, front, back);
174211088Smjacob	return (front ? 0 : 1);
175211088Smjacob}
176211088Smjacob
177211088Smjacob
178211088Smjacob/*
179211088Smjacob * Binary search for "string" in memory between "front" and "back".
180211088Smjacob *
181211088Smjacob * This routine is expected to return a pointer to the start of a line at
182211088Smjacob * *or before* the first word matching "string".  Relaxing the constraint
183211088Smjacob * this way simplifies the algorithm.
184211088Smjacob *
185211088Smjacob * Invariants:
186211088Smjacob * 	front points to the beginning of a line at or before the first
187211088Smjacob *	matching string.
188211088Smjacob *
189211088Smjacob * 	back points to the beginning of a line at or after the first
190211088Smjacob *	matching line.
191211088Smjacob *
192211088Smjacob * Base of the Invariants.
193211088Smjacob * 	front = NULL;
194211088Smjacob *	back = EOF;
195211088Smjacob *
196211088Smjacob * Advancing the Invariants:
197211088Smjacob *
198211088Smjacob * 	p = first newline after halfway point from front to back.
199211088Smjacob *
200211088Smjacob * 	If the string at "p" is not greater than the string to match,
201211088Smjacob *	p is the new front.  Otherwise it is the new back.
202211088Smjacob *
203211088Smjacob * Termination:
204211088Smjacob *
205211088Smjacob * 	The definition of the routine allows it return at any point,
206211088Smjacob *	since front is always at or before the line to print.
207211088Smjacob *
208211088Smjacob * 	In fact, it returns when the chosen "p" equals "back".  This
209211088Smjacob *	implies that there exists a string is least half as long as
210211088Smjacob *	(back - front), which in turn implies that a linear search will
211211088Smjacob *	be no more expensive than the cost of simply printing a string or two.
212211088Smjacob *
213211088Smjacob * 	Trying to continue with binary search at this point would be
214211088Smjacob *	more trouble than it's worth.
215211088Smjacob */
216211088Smjacob#define	SKIP_PAST_NEWLINE(p, back) \
217211088Smjacob	while (p < back && *p++ != '\n');
218211088Smjacob
219211088Smjacobchar *
220211088Smjacobbinary_search(string, front, back)
221211088Smjacob	register unsigned char *string, *front, *back;
222211088Smjacob{
223211088Smjacob	register unsigned char *p;
224211088Smjacob
225211088Smjacob	p = front + (back - front) / 2;
226211088Smjacob	SKIP_PAST_NEWLINE(p, back);
227211088Smjacob
228211088Smjacob	/*
229211088Smjacob	 * If the file changes underneath us, make sure we don't
230211088Smjacob	 * infinitely loop.
231211088Smjacob	 */
232211088Smjacob	while (p < back && back > front) {
233211088Smjacob		if (compare(string, p, back) == GREATER)
234211088Smjacob			front = p;
235211088Smjacob		else
236211088Smjacob			back = p;
237211088Smjacob		p = front + (back - front) / 2;
238211088Smjacob		SKIP_PAST_NEWLINE(p, back);
239211088Smjacob	}
240211088Smjacob	return (front);
241211088Smjacob}
242211088Smjacob
243211088Smjacob/*
244211088Smjacob * Find the first line that starts with string, linearly searching from front
245211088Smjacob * to back.
246211088Smjacob *
247211088Smjacob * Return NULL for no such line.
248211088Smjacob *
249211088Smjacob * This routine assumes:
250211088Smjacob *
251211088Smjacob * 	o front points at the first character in a line.
252211088Smjacob *	o front is before or at the first line to be printed.
253211088Smjacob */
254211088Smjacobchar *
255211088Smjacoblinear_search(string, front, back)
256211088Smjacob	unsigned char *string, *front, *back;
257211088Smjacob{
258211088Smjacob	while (front < back) {
259211088Smjacob		switch (compare(string, front, back)) {
260211088Smjacob		case EQUAL:		/* Found it. */
261211088Smjacob			return (front);
262211088Smjacob			break;
263211088Smjacob		case LESS:		/* No such string. */
264211088Smjacob			return (NULL);
265211088Smjacob			break;
266211088Smjacob		case GREATER:		/* Keep going. */
267211088Smjacob			break;
268211088Smjacob		}
269211088Smjacob		SKIP_PAST_NEWLINE(front, back);
270211088Smjacob	}
271211088Smjacob	return (NULL);
272211088Smjacob}
273211088Smjacob
274211088Smjacob/*
275211088Smjacob * Print as many lines as match string, starting at front.
276211088Smjacob */
277211088Smjacobvoid
278211088Smjacobprint_from(string, front, back)
279211088Smjacob	register unsigned char *string, *front, *back;
280211088Smjacob{
281211088Smjacob	for (; front < back && compare(string, front, back) == EQUAL; ++front) {
282211088Smjacob		for (; front < back && *front != '\n'; ++front)
283211088Smjacob			if (putchar(*front) == EOF)
284211088Smjacob				err("stdout: %s", strerror(errno));
285211088Smjacob		if (putchar('\n') == EOF)
286211088Smjacob			err("stdout: %s", strerror(errno));
287211088Smjacob	}
288211088Smjacob}
289211088Smjacob
290211088Smjacob/*
291211088Smjacob * Return LESS, GREATER, or EQUAL depending on how the string1 compares with
292211088Smjacob * string2 (s1 ??? s2).
293211088Smjacob *
294211088Smjacob * 	o Matches up to len(s1) are EQUAL.
295211088Smjacob *	o Matches up to len(s2) are GREATER.
296211088Smjacob *
297211088Smjacob * Compare understands about the -f and -d flags, and treats comparisons
298211088Smjacob * appropriately.
299211088Smjacob *
300211088Smjacob * The string "s1" is null terminated.  The string s2 is '\n' terminated (or
301211088Smjacob * "back" terminated).
302211088Smjacob */
303211088Smjacobint
304211088Smjacobcompare(s1, s2, back)
305211088Smjacob	register unsigned char *s1, *s2, *back;
306211088Smjacob{
307211088Smjacob	register int ch;
308211088Smjacob
309211088Smjacob	for (; *s1 && s2 < back && *s2 != '\n'; ++s1, ++s2) {
310211088Smjacob		ch = *s2;
311211088Smjacob		if (fflag)
312211088Smjacob			ch = FOLD(ch);
313211088Smjacob		if (dflag)
314211088Smjacob			ch = DICT(ch);
315211088Smjacob
316211088Smjacob		if (ch == NO_COMPARE) {
317211088Smjacob			++s2;		/* Ignore character in comparison. */
318211183Smjacob			continue;
319211183Smjacob		}
320211183Smjacob		if (*s1 != ch)
321211183Smjacob			return (*s1 < ch ? LESS : GREATER);
322211183Smjacob	}
323211088Smjacob	return (*s1 ? GREATER : EQUAL);
324211088Smjacob}
325211088Smjacob
326211088Smjacobstatic void
327211088Smjacobusage()
328211088Smjacob{
329211088Smjacob	(void)fprintf(stderr, "usage: look [-df] [-t char] string [file]\n");
330211088Smjacob	exit(2);
331211088Smjacob}
332211088Smjacob
333211088Smjacob#if __STDC__
334211088Smjacob#include <stdarg.h>
335211088Smjacob#else
336211088Smjacob#include <varargs.h>
337211088Smjacob#endif
338211088Smjacob
339211088Smjacobvoid
340211088Smjacob#if __STDC__
341211088Smjacoberr(const char *fmt, ...)
342211088Smjacob#else
343211088Smjacoberr(fmt, va_alist)
344211088Smjacob	char *fmt;
345211088Smjacob	va_dcl
346211088Smjacob#endif
347211088Smjacob{
348211088Smjacob	va_list ap;
349211088Smjacob#if __STDC__
350211088Smjacob	va_start(ap, fmt);
351211088Smjacob#else
352211088Smjacob	va_start(ap);
353211088Smjacob#endif
354211088Smjacob	(void)fprintf(stderr, "look: ");
355211088Smjacob	(void)vfprintf(stderr, fmt, ap);
356211088Smjacob	va_end(ap);
357211088Smjacob	(void)fprintf(stderr, "\n");
358211088Smjacob	exit(2);
359211088Smjacob	/* NOTREACHED */
360211088Smjacob}
361211088Smjacob