look.c revision 11900
1211088Smjacob/*- 2211088Smjacob * Copyright (c) 1991, 1993 3211088Smjacob * The Regents of the University of California. All rights reserved. 4211088Smjacob * 5211088Smjacob * This code is derived from software contributed to Berkeley by 6211088Smjacob * David Hitz of Auspex Systems, Inc. 7211088Smjacob * 8211088Smjacob * Redistribution and use in source and binary forms, with or without 9211088Smjacob * modification, are permitted provided that the following conditions 10211088Smjacob * are met: 11211088Smjacob * 1. Redistributions of source code must retain the above copyright 12211088Smjacob * notice, this list of conditions and the following disclaimer. 13211088Smjacob * 2. Redistributions in binary form must reproduce the above copyright 14211088Smjacob * notice, this list of conditions and the following disclaimer in the 15211088Smjacob * documentation and/or other materials provided with the distribution. 16211088Smjacob * 3. All advertising materials mentioning features or use of this software 17211088Smjacob * must display the following acknowledgement: 18211088Smjacob * This product includes software developed by the University of 19211088Smjacob * California, Berkeley and its contributors. 20211088Smjacob * 4. Neither the name of the University nor the names of its contributors 21211088Smjacob * may be used to endorse or promote products derived from this software 22211088Smjacob * without specific prior written permission. 23211088Smjacob * 24211088Smjacob * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25211088Smjacob * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26211088Smjacob * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27211088Smjacob * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28211088Smjacob * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29211088Smjacob * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30211088Smjacob * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31211088Smjacob * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32211088Smjacob * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33211088Smjacob * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34211183Smjacob * SUCH DAMAGE. 35211183Smjacob */ 36211183Smjacob 37211183Smjacob#ifndef lint 38211183Smjacobstatic char copyright[] = 39211183Smjacob"@(#) Copyright (c) 1991, 1993\n\ 40211183Smjacob The Regents of the University of California. All rights reserved.\n"; 41211183Smjacob#endif /* not lint */ 42211183Smjacob 43211183Smjacob#ifndef lint 44211183Smjacobstatic char sccsid[] = "@(#)look.c 8.1 (Berkeley) 6/14/93"; 45211088Smjacob#endif /* not lint */ 46211088Smjacob 47211088Smjacob/* 48211088Smjacob * look -- find lines in a sorted list. 49211088Smjacob * 50211088Smjacob * The man page said that TABs and SPACEs participate in -d comparisons. 51211088Smjacob * In fact, they were ignored. This implements historic practice, not 52211088Smjacob * the manual page. 53211088Smjacob */ 54211088Smjacob 55211088Smjacob#include <sys/types.h> 56211088Smjacob#include <sys/mman.h> 57211088Smjacob#include <sys/stat.h> 58211088Smjacob 59211088Smjacob#include <limits.h> 60211088Smjacob#include <locale.h> 61211088Smjacob#include <errno.h> 62211088Smjacob#include <fcntl.h> 63211183Smjacob#include <stdio.h> 64211183Smjacob#include <stdlib.h> 65211088Smjacob#include <string.h> 66211088Smjacob#include <ctype.h> 67211183Smjacob#include "pathnames.h" 68211088Smjacob 69211088Smjacob/* 70211088Smjacob * FOLD and DICT convert characters to a normal form for comparison, 71211088Smjacob * according to the user specified flags. 72211088Smjacob * 73211088Smjacob * DICT expects integers because it uses a non-character value to 74211088Smjacob * indicate a character which should not participate in comparisons. 75211088Smjacob */ 76211183Smjacob#define EQUAL 0 77211088Smjacob#define GREATER 1 78211088Smjacob#define LESS (-1) 79211088Smjacob#define NO_COMPARE (-2) 80211088Smjacob 81211088Smjacob#define FOLD(c) (isupper(c) ? tolower(c) : (unsigned char) (c)) 82211088Smjacob#define DICT(c) (isalnum(c) ? (c) & 0xFF /* int */ : NO_COMPARE) 83211183Smjacob 84211088Smjacobint dflag, fflag; 85211183Smjacob 86211183Smjacobchar *binary_search __P((unsigned char *, unsigned char *, unsigned char *)); 87211088Smjacobint compare __P((unsigned char *, unsigned char *, unsigned char *)); 88211088Smjacobvoid err __P((const char *fmt, ...)); 89211088Smjacobchar *linear_search __P((unsigned char *, unsigned char *, unsigned char *)); 90211088Smjacobint look __P((unsigned char *, unsigned char *, unsigned char *)); 91211088Smjacobvoid print_from __P((unsigned char *, unsigned char *, unsigned char *)); 92211088Smjacob 93211088Smjacobstatic void usage __P((void)); 94211088Smjacob 95211088Smjacobmain(argc, argv) 96211088Smjacob int argc; 97211088Smjacob char *argv[]; 98211088Smjacob{ 99211088Smjacob struct stat sb; 100211088Smjacob int ch, fd, termchar; 101211088Smjacob unsigned char *back, *file, *front, *string, *p; 102211088Smjacob 103211088Smjacob (void) setlocale(LC_CTYPE, ""); 104211088Smjacob 105211088Smjacob file = _PATH_WORDS; 106211088Smjacob termchar = '\0'; 107211088Smjacob while ((ch = getopt(argc, argv, "dft:")) != EOF) 108211088Smjacob switch(ch) { 109211088Smjacob case 'd': 110211183Smjacob dflag = 1; 111211088Smjacob break; 112211088Smjacob case 'f': 113211088Smjacob fflag = 1; 114211088Smjacob break; 115211088Smjacob case 't': 116211183Smjacob termchar = *optarg; 117211183Smjacob break; 118211183Smjacob case '?': 119211088Smjacob default: 120211183Smjacob usage(); 121211183Smjacob } 122211183Smjacob argc -= optind; 123211183Smjacob argv += optind; 124211183Smjacob 125211183Smjacob switch (argc) { 126211183Smjacob case 2: /* Don't set -df for user. */ 127211088Smjacob string = *argv++; 128211088Smjacob file = *argv; 129211088Smjacob break; 130211088Smjacob case 1: /* But set -df by default. */ 131211088Smjacob dflag = fflag = 1; 132211088Smjacob string = *argv; 133211088Smjacob break; 134211088Smjacob default: 135211088Smjacob usage(); 136211088Smjacob } 137211183Smjacob 138211088Smjacob if (termchar != '\0' && (p = strchr(string, termchar)) != NULL) 139211088Smjacob *++p = '\0'; 140211183Smjacob 141211088Smjacob if ((fd = open(file, O_RDONLY, 0)) < 0 || fstat(fd, &sb)) 142211088Smjacob err("%s: %s", file, strerror(errno)); 143211088Smjacob if (sb.st_size > SIZE_T_MAX) 144211088Smjacob err("%s: %s", file, strerror(EFBIG)); 145211088Smjacob if ((front = mmap(NULL, 146211088Smjacob (size_t)sb.st_size, PROT_READ, 0, fd, (off_t)0)) == NULL) 147211088Smjacob err("%s: %s", file, strerror(errno)); 148211088Smjacob back = front + sb.st_size; 149211088Smjacob exit(look(string, front, back)); 150211088Smjacob} 151211183Smjacob 152211183Smjacoblook(string, front, back) 153211183Smjacob unsigned char *string, *front, *back; 154211183Smjacob{ 155211183Smjacob register int ch; 156211088Smjacob register unsigned char *readp, *writep; 157211088Smjacob 158211088Smjacob /* Reformat string string to avoid doing it multiple times later. */ 159211088Smjacob for (readp = writep = string; ch = *readp++;) { 160211088Smjacob if (fflag) 161211088Smjacob ch = FOLD(ch); 162211088Smjacob if (dflag) 163211088Smjacob ch = DICT(ch); 164211088Smjacob if (ch != NO_COMPARE) 165211088Smjacob *(writep++) = ch; 166211088Smjacob } 167211088Smjacob *writep = '\0'; 168211088Smjacob 169211088Smjacob front = binary_search(string, front, back); 170211088Smjacob front = linear_search(string, front, back); 171211088Smjacob 172211088Smjacob if (front) 173211088Smjacob print_from(string, front, back); 174211088Smjacob return (front ? 0 : 1); 175211088Smjacob} 176211088Smjacob 177211088Smjacob 178211088Smjacob/* 179211088Smjacob * Binary search for "string" in memory between "front" and "back". 180211088Smjacob * 181211088Smjacob * This routine is expected to return a pointer to the start of a line at 182211088Smjacob * *or before* the first word matching "string". Relaxing the constraint 183211088Smjacob * this way simplifies the algorithm. 184211088Smjacob * 185211088Smjacob * Invariants: 186211088Smjacob * front points to the beginning of a line at or before the first 187211088Smjacob * matching string. 188211088Smjacob * 189211088Smjacob * back points to the beginning of a line at or after the first 190211088Smjacob * matching line. 191211088Smjacob * 192211088Smjacob * Base of the Invariants. 193211088Smjacob * front = NULL; 194211088Smjacob * back = EOF; 195211088Smjacob * 196211088Smjacob * Advancing the Invariants: 197211088Smjacob * 198211088Smjacob * p = first newline after halfway point from front to back. 199211088Smjacob * 200211088Smjacob * If the string at "p" is not greater than the string to match, 201211088Smjacob * p is the new front. Otherwise it is the new back. 202211088Smjacob * 203211088Smjacob * Termination: 204211088Smjacob * 205211088Smjacob * The definition of the routine allows it return at any point, 206211088Smjacob * since front is always at or before the line to print. 207211088Smjacob * 208211088Smjacob * In fact, it returns when the chosen "p" equals "back". This 209211088Smjacob * implies that there exists a string is least half as long as 210211088Smjacob * (back - front), which in turn implies that a linear search will 211211088Smjacob * be no more expensive than the cost of simply printing a string or two. 212211088Smjacob * 213211088Smjacob * Trying to continue with binary search at this point would be 214211088Smjacob * more trouble than it's worth. 215211088Smjacob */ 216211088Smjacob#define SKIP_PAST_NEWLINE(p, back) \ 217211088Smjacob while (p < back && *p++ != '\n'); 218211088Smjacob 219211088Smjacobchar * 220211088Smjacobbinary_search(string, front, back) 221211088Smjacob register unsigned char *string, *front, *back; 222211088Smjacob{ 223211088Smjacob register unsigned char *p; 224211088Smjacob 225211088Smjacob p = front + (back - front) / 2; 226211088Smjacob SKIP_PAST_NEWLINE(p, back); 227211088Smjacob 228211088Smjacob /* 229211088Smjacob * If the file changes underneath us, make sure we don't 230211088Smjacob * infinitely loop. 231211088Smjacob */ 232211088Smjacob while (p < back && back > front) { 233211088Smjacob if (compare(string, p, back) == GREATER) 234211088Smjacob front = p; 235211088Smjacob else 236211088Smjacob back = p; 237211088Smjacob p = front + (back - front) / 2; 238211088Smjacob SKIP_PAST_NEWLINE(p, back); 239211088Smjacob } 240211088Smjacob return (front); 241211088Smjacob} 242211088Smjacob 243211088Smjacob/* 244211088Smjacob * Find the first line that starts with string, linearly searching from front 245211088Smjacob * to back. 246211088Smjacob * 247211088Smjacob * Return NULL for no such line. 248211088Smjacob * 249211088Smjacob * This routine assumes: 250211088Smjacob * 251211088Smjacob * o front points at the first character in a line. 252211088Smjacob * o front is before or at the first line to be printed. 253211088Smjacob */ 254211088Smjacobchar * 255211088Smjacoblinear_search(string, front, back) 256211088Smjacob unsigned char *string, *front, *back; 257211088Smjacob{ 258211088Smjacob while (front < back) { 259211088Smjacob switch (compare(string, front, back)) { 260211088Smjacob case EQUAL: /* Found it. */ 261211088Smjacob return (front); 262211088Smjacob break; 263211088Smjacob case LESS: /* No such string. */ 264211088Smjacob return (NULL); 265211088Smjacob break; 266211088Smjacob case GREATER: /* Keep going. */ 267211088Smjacob break; 268211088Smjacob } 269211088Smjacob SKIP_PAST_NEWLINE(front, back); 270211088Smjacob } 271211088Smjacob return (NULL); 272211088Smjacob} 273211088Smjacob 274211088Smjacob/* 275211088Smjacob * Print as many lines as match string, starting at front. 276211088Smjacob */ 277211088Smjacobvoid 278211088Smjacobprint_from(string, front, back) 279211088Smjacob register unsigned char *string, *front, *back; 280211088Smjacob{ 281211088Smjacob for (; front < back && compare(string, front, back) == EQUAL; ++front) { 282211088Smjacob for (; front < back && *front != '\n'; ++front) 283211088Smjacob if (putchar(*front) == EOF) 284211088Smjacob err("stdout: %s", strerror(errno)); 285211088Smjacob if (putchar('\n') == EOF) 286211088Smjacob err("stdout: %s", strerror(errno)); 287211088Smjacob } 288211088Smjacob} 289211088Smjacob 290211088Smjacob/* 291211088Smjacob * Return LESS, GREATER, or EQUAL depending on how the string1 compares with 292211088Smjacob * string2 (s1 ??? s2). 293211088Smjacob * 294211088Smjacob * o Matches up to len(s1) are EQUAL. 295211088Smjacob * o Matches up to len(s2) are GREATER. 296211088Smjacob * 297211088Smjacob * Compare understands about the -f and -d flags, and treats comparisons 298211088Smjacob * appropriately. 299211088Smjacob * 300211088Smjacob * The string "s1" is null terminated. The string s2 is '\n' terminated (or 301211088Smjacob * "back" terminated). 302211088Smjacob */ 303211088Smjacobint 304211088Smjacobcompare(s1, s2, back) 305211088Smjacob register unsigned char *s1, *s2, *back; 306211088Smjacob{ 307211088Smjacob register int ch; 308211088Smjacob 309211088Smjacob for (; *s1 && s2 < back && *s2 != '\n'; ++s1, ++s2) { 310211088Smjacob ch = *s2; 311211088Smjacob if (fflag) 312211088Smjacob ch = FOLD(ch); 313211088Smjacob if (dflag) 314211088Smjacob ch = DICT(ch); 315211088Smjacob 316211088Smjacob if (ch == NO_COMPARE) { 317211088Smjacob ++s2; /* Ignore character in comparison. */ 318211183Smjacob continue; 319211183Smjacob } 320211183Smjacob if (*s1 != ch) 321211183Smjacob return (*s1 < ch ? LESS : GREATER); 322211183Smjacob } 323211088Smjacob return (*s1 ? GREATER : EQUAL); 324211088Smjacob} 325211088Smjacob 326211088Smjacobstatic void 327211088Smjacobusage() 328211088Smjacob{ 329211088Smjacob (void)fprintf(stderr, "usage: look [-df] [-t char] string [file]\n"); 330211088Smjacob exit(2); 331211088Smjacob} 332211088Smjacob 333211088Smjacob#if __STDC__ 334211088Smjacob#include <stdarg.h> 335211088Smjacob#else 336211088Smjacob#include <varargs.h> 337211088Smjacob#endif 338211088Smjacob 339211088Smjacobvoid 340211088Smjacob#if __STDC__ 341211088Smjacoberr(const char *fmt, ...) 342211088Smjacob#else 343211088Smjacoberr(fmt, va_alist) 344211088Smjacob char *fmt; 345211088Smjacob va_dcl 346211088Smjacob#endif 347211088Smjacob{ 348211088Smjacob va_list ap; 349211088Smjacob#if __STDC__ 350211088Smjacob va_start(ap, fmt); 351211088Smjacob#else 352211088Smjacob va_start(ap); 353211088Smjacob#endif 354211088Smjacob (void)fprintf(stderr, "look: "); 355211088Smjacob (void)vfprintf(stderr, fmt, ap); 356211088Smjacob va_end(ap); 357211088Smjacob (void)fprintf(stderr, "\n"); 358211088Smjacob exit(2); 359211088Smjacob /* NOTREACHED */ 360211088Smjacob} 361211088Smjacob