look.c revision 99112
190075Sobrien/*- 290075Sobrien * Copyright (c) 1991, 1993 3110611Skan * The Regents of the University of California. All rights reserved. 490075Sobrien * 590075Sobrien * This code is derived from software contributed to Berkeley by 690075Sobrien * David Hitz of Auspex Systems, Inc. 790075Sobrien * 890075Sobrien * Redistribution and use in source and binary forms, with or without 990075Sobrien * modification, are permitted provided that the following conditions 1090075Sobrien * are met: 1190075Sobrien * 1. Redistributions of source code must retain the above copyright 1290075Sobrien * notice, this list of conditions and the following disclaimer. 1390075Sobrien * 2. Redistributions in binary form must reproduce the above copyright 1490075Sobrien * notice, this list of conditions and the following disclaimer in the 1590075Sobrien * documentation and/or other materials provided with the distribution. 1690075Sobrien * 3. All advertising materials mentioning features or use of this software 1790075Sobrien * must display the following acknowledgement: 1890075Sobrien * This product includes software developed by the University of 1990075Sobrien * California, Berkeley and its contributors. 2090075Sobrien * 4. Neither the name of the University nor the names of its contributors 2190075Sobrien * may be used to endorse or promote products derived from this software 2290075Sobrien * without specific prior written permission. 2390075Sobrien * 2490075Sobrien * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2590075Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2690075Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2790075Sobrien * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2896263Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2996263Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3090075Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3190075Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3296263Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3390075Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3490075Sobrien * SUCH DAMAGE. 3590075Sobrien */ 3690075Sobrien 3790075Sobrien#ifndef lint 3890075Sobrienstatic const char copyright[] = 3990075Sobrien"@(#) Copyright (c) 1991, 1993\n\ 4090075Sobrien The Regents of the University of California. All rights reserved.\n"; 4190075Sobrien#endif /* not lint */ 4290075Sobrien 4390075Sobrien#ifndef lint 4490075Sobrien#if 0 4590075Sobrienstatic char sccsid[] = "@(#)look.c 8.2 (Berkeley) 5/4/95"; 4690075Sobrien#endif 4790075Sobrien#endif /* not lint */ 4890075Sobrien#include <sys/cdefs.h> 4990075Sobrien__FBSDID("$FreeBSD: head/usr.bin/look/look.c 99112 2002-06-30 05:25:07Z obrien $"); 5090075Sobrien 5190075Sobrien/* 5290075Sobrien * look -- find lines in a sorted list. 5390075Sobrien * 5490075Sobrien * The man page said that TABs and SPACEs participate in -d comparisons. 5590075Sobrien * In fact, they were ignored. This implements historic practice, not 5690075Sobrien * the manual page. 5790075Sobrien */ 5896263Sobrien 59107590Sobrien#include <sys/types.h> 6090075Sobrien#include <sys/mman.h> 61107590Sobrien#include <sys/stat.h> 62107590Sobrien 63107590Sobrien#include <ctype.h> 64107590Sobrien#include <err.h> 65107590Sobrien#include <errno.h> 66107590Sobrien#include <fcntl.h> 67107590Sobrien#include <limits.h> 68107590Sobrien#include <locale.h> 69107590Sobrien#include <stdio.h> 70107590Sobrien#include <stdlib.h> 71107590Sobrien#include <string.h> 72107590Sobrien#include <unistd.h> 73107590Sobrien 74107590Sobrien#include "pathnames.h" 75107590Sobrien 76107590Sobrienstatic char _path_words[] = _PATH_WORDS; 77107590Sobrien 78107590Sobrien/* 79107590Sobrien * FOLD and DICT convert characters to a normal form for comparison, 80107590Sobrien * according to the user specified flags. 81107590Sobrien * 82107590Sobrien * DICT expects integers because it uses a non-character value to 83107590Sobrien * indicate a character which should not participate in comparisons. 8490075Sobrien */ 8590075Sobrien#define EQUAL 0 8690075Sobrien#define GREATER 1 8790075Sobrien#define LESS (-1) 8890075Sobrien#define NO_COMPARE (-2) 8990075Sobrien 9090075Sobrien#define FOLD(c) (isupper(c) ? tolower(c) : (unsigned char) (c)) 9190075Sobrien#define DICT(c) (isalnum(c) ? (c) & 0xFF /* int */ : NO_COMPARE) 9290075Sobrien 9390075Sobrienint dflag, fflag; 9490075Sobrien 9590075Sobrienchar *binary_search(unsigned char *, unsigned char *, unsigned char *); 9690075Sobrienint compare(unsigned char *, unsigned char *, unsigned char *); 9790075Sobrienchar *linear_search(unsigned char *, unsigned char *, unsigned char *); 98107590Sobrienint look(unsigned char *, unsigned char *, unsigned char *); 9990075Sobrienvoid print_from(unsigned char *, unsigned char *, unsigned char *); 10090075Sobrien 10190075Sobrienstatic void usage(void); 10290075Sobrien 103107590Sobrienint 10490075Sobrienmain(argc, argv) 10590075Sobrien int argc; 10690075Sobrien char *argv[]; 10790075Sobrien{ 10890075Sobrien struct stat sb; 10990075Sobrien int ch, fd, termchar, match; 11090075Sobrien unsigned char *back, *front, *string, *p; 11190075Sobrien unsigned const char *file; 11290075Sobrien 11390075Sobrien (void) setlocale(LC_CTYPE, ""); 11490075Sobrien 11590075Sobrien file = _path_words; 11690075Sobrien termchar = '\0'; 11790075Sobrien while ((ch = getopt(argc, argv, "dft:")) != -1) 11890075Sobrien switch(ch) { 11990075Sobrien case 'd': 12090075Sobrien dflag = 1; 12190075Sobrien break; 12290075Sobrien case 'f': 12390075Sobrien fflag = 1; 12490075Sobrien break; 12590075Sobrien case 't': 12690075Sobrien termchar = *optarg; 12790075Sobrien break; 12890075Sobrien case '?': 12990075Sobrien default: 13090075Sobrien usage(); 13190075Sobrien } 13290075Sobrien argc -= optind; 13390075Sobrien argv += optind; 13490075Sobrien 13590075Sobrien if (argc == 0) 13690075Sobrien usage(); 13790075Sobrien if (argc == 1) /* But set -df by default. */ 13890075Sobrien dflag = fflag = 1; 13990075Sobrien string = *argv++; 14090075Sobrien if (argc >= 2) 14190075Sobrien file = *argv++; 14290075Sobrien 14390075Sobrien if (termchar != '\0' && (p = strchr(string, termchar)) != NULL) 14490075Sobrien *++p = '\0'; 14590075Sobrien match = 1; 14690075Sobrien 14790075Sobrien do { 14890075Sobrien if ((fd = open(file, O_RDONLY, 0)) < 0 || fstat(fd, &sb)) 14990075Sobrien err(2, "%s", file); 15090075Sobrien if (sb.st_size > SIZE_T_MAX) 15190075Sobrien errx(2, "%s: %s", file, strerror(EFBIG)); 15290075Sobrien if ((front = mmap(NULL, (size_t)sb.st_size, PROT_READ, MAP_SHARED, fd, (off_t)0)) == MAP_FAILED) 15390075Sobrien err(2, "%s", file); 15490075Sobrien back = front + sb.st_size; 15590075Sobrien match *= (look(string, front, back)); 15690075Sobrien close(fd); 15790075Sobrien } while (argc-- > 2 && (file = *argv++)); 15890075Sobrien 15990075Sobrien exit(match); 16090075Sobrien} 16190075Sobrien 16290075Sobrienint 16390075Sobrienlook(string, front, back) 16490075Sobrien unsigned char *string, *front, *back; 16590075Sobrien{ 16690075Sobrien register int ch; 16790075Sobrien register unsigned char *readp, *writep; 16890075Sobrien 16990075Sobrien /* Reformat string string to avoid doing it multiple times later. */ 17090075Sobrien for (readp = writep = string; (ch = *readp++);) { 17190075Sobrien if (fflag) 17290075Sobrien ch = FOLD(ch); 17390075Sobrien if (dflag) 17490075Sobrien ch = DICT(ch); 17590075Sobrien if (ch != NO_COMPARE) 17690075Sobrien *(writep++) = ch; 17790075Sobrien } 17890075Sobrien *writep = '\0'; 17990075Sobrien 18090075Sobrien front = binary_search(string, front, back); 18190075Sobrien front = linear_search(string, front, back); 18290075Sobrien 18390075Sobrien if (front) 18490075Sobrien print_from(string, front, back); 18590075Sobrien return (front ? 0 : 1); 18690075Sobrien} 18790075Sobrien 18890075Sobrien 18990075Sobrien/* 19090075Sobrien * Binary search for "string" in memory between "front" and "back". 19190075Sobrien * 19290075Sobrien * This routine is expected to return a pointer to the start of a line at 19390075Sobrien * *or before* the first word matching "string". Relaxing the constraint 19490075Sobrien * this way simplifies the algorithm. 19590075Sobrien * 19690075Sobrien * Invariants: 19790075Sobrien * front points to the beginning of a line at or before the first 19890075Sobrien * matching string. 19990075Sobrien * 20090075Sobrien * back points to the beginning of a line at or after the first 20190075Sobrien * matching line. 20290075Sobrien * 20390075Sobrien * Base of the Invariants. 20490075Sobrien * front = NULL; 20590075Sobrien * back = EOF; 20690075Sobrien * 20790075Sobrien * Advancing the Invariants: 20890075Sobrien * 20990075Sobrien * p = first newline after halfway point from front to back. 21090075Sobrien * 21190075Sobrien * If the string at "p" is not greater than the string to match, 21290075Sobrien * p is the new front. Otherwise it is the new back. 21390075Sobrien * 21490075Sobrien * Termination: 21590075Sobrien * 21690075Sobrien * The definition of the routine allows it return at any point, 21790075Sobrien * since front is always at or before the line to print. 21890075Sobrien * 21990075Sobrien * In fact, it returns when the chosen "p" equals "back". This 22090075Sobrien * implies that there exists a string is least half as long as 22190075Sobrien * (back - front), which in turn implies that a linear search will 22290075Sobrien * be no more expensive than the cost of simply printing a string or two. 22390075Sobrien * 22490075Sobrien * Trying to continue with binary search at this point would be 22590075Sobrien * more trouble than it's worth. 22690075Sobrien */ 22790075Sobrien#define SKIP_PAST_NEWLINE(p, back) \ 22890075Sobrien while (p < back && *p++ != '\n'); 22990075Sobrien 23090075Sobrienchar * 23190075Sobrienbinary_search(string, front, back) 23290075Sobrien register unsigned char *string, *front, *back; 23390075Sobrien{ 23490075Sobrien register unsigned char *p; 23590075Sobrien 23690075Sobrien p = front + (back - front) / 2; 23790075Sobrien SKIP_PAST_NEWLINE(p, back); 23890075Sobrien 23996263Sobrien /* 24096263Sobrien * If the file changes underneath us, make sure we don't 24196263Sobrien * infinitely loop. 24296263Sobrien */ 24396263Sobrien while (p < back && back > front) { 24496263Sobrien if (compare(string, p, back) == GREATER) 24596263Sobrien front = p; 24696263Sobrien else 24796263Sobrien back = p; 24896263Sobrien p = front + (back - front) / 2; 24996263Sobrien SKIP_PAST_NEWLINE(p, back); 25096263Sobrien } 25196263Sobrien return (front); 25296263Sobrien} 25396263Sobrien 25496263Sobrien/* 25596263Sobrien * Find the first line that starts with string, linearly searching from front 25696263Sobrien * to back. 25796263Sobrien * 25896263Sobrien * Return NULL for no such line. 25996263Sobrien * 26096263Sobrien * This routine assumes: 26196263Sobrien * 26296263Sobrien * o front points at the first character in a line. 26396263Sobrien * o front is before or at the first line to be printed. 26496263Sobrien */ 26590075Sobrienchar * 26690075Sobrienlinear_search(string, front, back) 26790075Sobrien unsigned char *string, *front, *back; 26890075Sobrien{ 26990075Sobrien while (front < back) { 27090075Sobrien switch (compare(string, front, back)) { 27190075Sobrien case EQUAL: /* Found it. */ 27290075Sobrien return (front); 27390075Sobrien break; 27490075Sobrien case LESS: /* No such string. */ 27590075Sobrien return (NULL); 27690075Sobrien break; 27790075Sobrien case GREATER: /* Keep going. */ 27890075Sobrien break; 27990075Sobrien } 28090075Sobrien SKIP_PAST_NEWLINE(front, back); 28190075Sobrien } 28290075Sobrien return (NULL); 28390075Sobrien} 28490075Sobrien 28590075Sobrien/* 28690075Sobrien * Print as many lines as match string, starting at front. 28790075Sobrien */ 28890075Sobrienvoid 28990075Sobrienprint_from(string, front, back) 29090075Sobrien register unsigned char *string, *front, *back; 29190075Sobrien{ 29290075Sobrien for (; front < back && compare(string, front, back) == EQUAL; ++front) { 29390075Sobrien for (; front < back && *front != '\n'; ++front) 29490075Sobrien if (putchar(*front) == EOF) 29590075Sobrien err(2, "stdout"); 29690075Sobrien if (putchar('\n') == EOF) 29790075Sobrien err(2, "stdout"); 29890075Sobrien } 29990075Sobrien} 30090075Sobrien 30190075Sobrien/* 30290075Sobrien * Return LESS, GREATER, or EQUAL depending on how the string1 compares with 30390075Sobrien * string2 (s1 ??? s2). 30490075Sobrien * 30590075Sobrien * o Matches up to len(s1) are EQUAL. 30690075Sobrien * o Matches up to len(s2) are GREATER. 30790075Sobrien * 30890075Sobrien * Compare understands about the -f and -d flags, and treats comparisons 30990075Sobrien * appropriately. 31090075Sobrien * 31190075Sobrien * The string "s1" is null terminated. The string s2 is '\n' terminated (or 31290075Sobrien * "back" terminated). 31390075Sobrien */ 31490075Sobrienint 31596263Sobriencompare(s1, s2, back) 31690075Sobrien register unsigned char *s1, *s2, *back; 31796263Sobrien{ 31896263Sobrien register int ch; 31990075Sobrien 32096263Sobrien for (; *s1 && s2 < back && *s2 != '\n'; ++s1, ++s2) { 32196263Sobrien ch = *s2; 32296263Sobrien if (fflag) 32396263Sobrien ch = FOLD(ch); 32490075Sobrien if (dflag) 32596263Sobrien ch = DICT(ch); 32696263Sobrien 32796263Sobrien if (ch == NO_COMPARE) { 32896263Sobrien ++s2; /* Ignore character in comparison. */ 32996263Sobrien continue; 33096263Sobrien } 331110611Skan if (*s1 != ch) 332110611Skan return (*s1 < ch ? LESS : GREATER); 333110611Skan } 334110611Skan return (*s1 ? GREATER : EQUAL); 335110611Skan} 33696263Sobrien 33790075Sobrienstatic void 33890075Sobrienusage() 33990075Sobrien{ 34090075Sobrien (void)fprintf(stderr, "usage: look [-df] [-t char] string [file ...]\n"); 34190075Sobrien exit(2); 34290075Sobrien} 34390075Sobrien