1/*	$OpenBSD: look.c,v 1.27 2022/12/04 23:50:48 cheloha Exp $	*/
2/*	$NetBSD: look.c,v 1.7 1995/08/31 22:41:02 jtc Exp $	*/
3
4/*-
5 * Copyright (c) 1991, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * David Hitz of Auspex Systems, Inc.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36/*
37 * look -- find lines in a sorted list.
38 *
39 * The man page said that TABs and SPACEs participate in -d comparisons.
40 * In fact, they were ignored.  This implements historic practice, not
41 * the manual page.
42 */
43
44#include <sys/types.h>
45#include <sys/mman.h>
46#include <sys/stat.h>
47
48#include <ctype.h>
49#include <errno.h>
50#include <fcntl.h>
51#include <stdint.h>
52#include <stdio.h>
53#include <stdlib.h>
54#include <string.h>
55#include <unistd.h>
56#include <err.h>
57
58#include "pathnames.h"
59
60#define	EQUAL		0
61#define	GREATER		1
62#define	LESS		(-1)
63
64int dflag, fflag;
65
66char	*binary_search(char *, char *, char *);
67int	 compare(char *, char *, char *);
68char	*linear_search(char *, char *, char *);
69int	 look(char *, char *, char *);
70void	 print_from(char *, char *, char *);
71void	 usage(void);
72
73int
74main(int argc, char *argv[])
75{
76	struct stat sb;
77	int ch, fd, termchar;
78	char *back, *file, *front, *string, *p;
79
80	if (pledge("stdio rpath", NULL) == -1)
81		err(2, "pledge");
82
83	file = _PATH_WORDS;
84	termchar = '\0';
85	while ((ch = getopt(argc, argv, "dft:")) != -1)
86		switch(ch) {
87		case 'd':
88			dflag = 1;
89			break;
90		case 'f':
91			fflag = 1;
92			break;
93		case 't':
94			termchar = *optarg;
95			break;
96		default:
97			usage();
98		}
99	argc -= optind;
100	argv += optind;
101
102	switch (argc) {
103	case 2:				/* Don't set -df for user. */
104		string = *argv++;
105		file = *argv;
106		break;
107	case 1:				/* But set -df by default. */
108		dflag = fflag = 1;
109		string = *argv;
110		break;
111	default:
112		usage();
113	}
114
115	if (termchar != '\0' && (p = strchr(string, termchar)) != NULL)
116		*++p = '\0';
117
118	if ((fd = open(file, O_RDONLY)) == -1 || fstat(fd, &sb) == -1)
119		err(2, "%s", file);
120	if (sb.st_size > SIZE_MAX)
121		errc(2, EFBIG, "%s", file);
122
123	if (pledge("stdio", NULL) == -1)
124		err(2, "pledge");
125
126	if ((front = mmap(NULL,
127	    (size_t)sb.st_size, PROT_READ, MAP_PRIVATE, fd, (off_t)0)) == MAP_FAILED)
128		err(2, "%s", file);
129	back = front + sb.st_size;
130	exit(look(string, front, back));
131}
132
133int
134look(char *string, char *front, char *back)
135{
136	int ch;
137	char *readp, *writep;
138
139	/* Reformat string to avoid doing it multiple times later. */
140	for (readp = writep = string; (ch = *readp++);) {
141		if (fflag)
142			ch = tolower((unsigned char)ch);
143		if (!dflag || isalnum((unsigned char)ch))
144			*(writep++) = ch;
145	}
146	*writep = '\0';
147
148	front = binary_search(string, front, back);
149	front = linear_search(string, front, back);
150
151	if (front)
152		print_from(string, front, back);
153	return (front ? 0 : 1);
154}
155
156
157/*
158 * Binary search for "string" in memory between "front" and "back".
159 *
160 * This routine is expected to return a pointer to the start of a line at
161 * *or before* the first word matching "string".  Relaxing the constraint
162 * this way simplifies the algorithm.
163 *
164 * Invariants:
165 *	front points to the beginning of a line at or before the first
166 *	matching string.
167 *
168 *	back points to the beginning of a line at or after the first
169 *	matching line.
170 *
171 * Base of the Invariants.
172 *	front = NULL;
173 *	back = EOF;
174 *
175 * Advancing the Invariants:
176 *
177 *	p = first newline after halfway point from front to back.
178 *
179 *	If the string at "p" is not greater than the string to match,
180 *	p is the new front.  Otherwise it is the new back.
181 *
182 * Termination:
183 *
184 *	The definition of the routine allows it return at any point,
185 *	since front is always at or before the line to print.
186 *
187 *	In fact, it returns when the chosen "p" equals "back".  This
188 *	implies that there exists a string is least half as long as
189 *	(back - front), which in turn implies that a linear search will
190 *	be no more expensive than the cost of simply printing a string or two.
191 *
192 *	Trying to continue with binary search at this point would be
193 *	more trouble than it's worth.
194 */
195#define	SKIP_PAST_NEWLINE(p, back) \
196	while (p < back && *p++ != '\n');
197
198char *
199binary_search(char *string, char *front, char *back)
200{
201	char *p;
202
203	p = front + (back - front) / 2;
204	SKIP_PAST_NEWLINE(p, back);
205
206	/*
207	 * If the file changes underneath us, make sure we don't
208	 * infinitely loop.
209	 */
210	while (p < back && back > front) {
211		if (compare(string, p, back) == GREATER)
212			front = p;
213		else
214			back = p;
215		p = front + (back - front) / 2;
216		SKIP_PAST_NEWLINE(p, back);
217	}
218	return (front);
219}
220
221/*
222 * Find the first line that starts with string, linearly searching from front
223 * to back.
224 *
225 * Return NULL for no such line.
226 *
227 * This routine assumes:
228 *
229 *	o front points at the first character in a line.
230 *	o front is before or at the first line to be printed.
231 */
232char *
233linear_search(char *string, char *front, char *back)
234{
235	while (front < back) {
236		switch (compare(string, front, back)) {
237		case EQUAL:		/* Found it. */
238			return (front);
239			break;
240		case LESS:		/* No such string. */
241			return (NULL);
242			break;
243		case GREATER:		/* Keep going. */
244			break;
245		}
246		SKIP_PAST_NEWLINE(front, back);
247	}
248	return (NULL);
249}
250
251/*
252 * Print as many lines as match string, starting at front.
253 */
254void
255print_from(char *string, char *front, char *back)
256{
257	for (; front < back && compare(string, front, back) == EQUAL; ++front) {
258		for (; front < back && *front != '\n'; ++front)
259			if (putchar(*front) == EOF)
260				err(2, "stdout");
261		if (putchar('\n') == EOF)
262			err(2, "stdout");
263	}
264}
265
266/*
267 * Return LESS, GREATER, or EQUAL depending on how the string1 compares with
268 * string2 (s1 ??? s2).
269 *
270 *	o Matches up to len(s1) are EQUAL.
271 *	o Matches up to len(s2) are GREATER.
272 *
273 * Compare understands about the -f and -d flags, and treats comparisons
274 * appropriately.
275 *
276 * The string "s1" is null terminated.  The string s2 is '\n' terminated (or
277 * "back" terminated).
278 */
279int
280compare(char *s1, char *s2, char *back)
281{
282	int ch;
283
284	for (; *s1 && s2 < back && *s2 != '\n'; ++s1, ++s2) {
285		ch = *s2;
286		if (fflag)
287			ch = tolower((unsigned char)ch);
288		if (dflag && !isalnum((unsigned char)ch)) {
289			++s2;		/* Ignore character in comparison. */
290			continue;
291		}
292		if (*s1 != ch)
293			return (*s1 < ch ? LESS : GREATER);
294	}
295	return (*s1 ? GREATER : EQUAL);
296}
297
298void
299usage(void)
300{
301	(void)fprintf(stderr,
302	    "usage: look [-df] [-t termchar] string [file]\n");
303	exit(2);
304}
305