1/* $OpenBSD: grep.c,v 1.67 2022/06/26 18:48:10 op Exp $ */ 2 3/*- 4 * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29#include <sys/types.h> 30#include <sys/stat.h> 31#include <sys/queue.h> 32 33#include <ctype.h> 34#include <err.h> 35#include <errno.h> 36#include <getopt.h> 37#include <regex.h> 38#include <stdio.h> 39#include <stdlib.h> 40#include <string.h> 41#include <unistd.h> 42 43#include "grep.h" 44 45/* Flags passed to regcomp() and regexec() */ 46int cflags; 47int eflags = REG_STARTEND; 48 49int matchall; /* shortcut */ 50int patterns, pattern_sz; 51char **pattern; 52regex_t *r_pattern; 53fastgrep_t *fg_pattern; 54 55/* For regex errors */ 56char re_error[RE_ERROR_BUF + 1]; 57 58/* Command-line flags */ 59int Aflag; /* -A x: print x lines trailing each match */ 60int Bflag; /* -B x: print x lines leading each match */ 61int Eflag; /* -E: interpret pattern as extended regexp */ 62int Fflag; /* -F: interpret pattern as list of fixed strings */ 63int Hflag; /* -H: always print filename header */ 64int Lflag; /* -L: only show names of files with no matches */ 65int Rflag; /* -R: recursively search directory trees */ 66int Zflag; /* -Z: decompress input before processing */ 67int bflag; /* -b: show block numbers for each match */ 68int cflag; /* -c: only show a count of matching lines */ 69int hflag; /* -h: don't print filename headers */ 70int iflag; /* -i: ignore case */ 71int lflag; /* -l: only show names of files with matches */ 72int mflag; /* -m x: stop reading the files after x matches */ 73long long mcount; /* count for -m */ 74long long mlimit; /* requested value for -m */ 75int nflag; /* -n: show line numbers in front of matching lines */ 76int oflag; /* -o: print each match */ 77int qflag; /* -q: quiet mode (don't output anything) */ 78int sflag; /* -s: silent mode (ignore errors) */ 79int vflag; /* -v: only show non-matching lines */ 80int wflag; /* -w: pattern must start and end on word boundaries */ 81int xflag; /* -x: pattern must match entire line */ 82int lbflag; /* --line-buffered */ 83int nullflag; /* --null */ 84const char *labelname; /* --label=name */ 85 86int binbehave = BIN_FILE_BIN; 87 88enum { 89 BIN_OPT = CHAR_MAX + 1, 90 HELP_OPT, 91 MMAP_OPT, 92 LINEBUF_OPT, 93 NULL_OPT, 94 LABEL_OPT, 95}; 96 97/* Housekeeping */ 98int first; /* flag whether or not this is our first match */ 99int tail; /* lines left to print */ 100int file_err; /* file reading error */ 101 102struct patfile { 103 const char *pf_file; 104 SLIST_ENTRY(patfile) pf_next; 105}; 106SLIST_HEAD(, patfile) patfilelh; 107 108extern char *__progname; 109 110static void 111usage(void) 112{ 113 fprintf(stderr, 114#ifdef NOZ 115 "usage: %s [-abcEFGHhIiLlnoqRsUVvwx] [-A num] [-B num] [-C[num]]" 116#else 117 "usage: %s [-abcEFGHhIiLlnoqRsUVvwxZ] [-A num] [-B num] [-C[num]]" 118#endif 119 " [-e pattern]\n" 120 "\t[-f file] [-m num] [--binary-files=value] [--context[=num]]\n" 121 "\t[--label=name] [--line-buffered] [--null] [pattern]" 122 " [file ...]\n", 123 __progname); 124 exit(2); 125} 126 127#ifdef NOZ 128static const char optstr[] = "0123456789A:B:CEFGHILRUVabce:f:hilm:noqrsuvwxy"; 129#else 130static const char optstr[] = "0123456789A:B:CEFGHILRUVZabce:f:hilm:noqrsuvwxy"; 131#endif 132 133static const struct option long_options[] = 134{ 135 {"binary-files", required_argument, NULL, BIN_OPT}, 136 {"help", no_argument, NULL, HELP_OPT}, 137 {"mmap", no_argument, NULL, MMAP_OPT}, 138 {"label", required_argument, NULL, LABEL_OPT}, 139 {"line-buffered", no_argument, NULL, LINEBUF_OPT}, 140 {"null", no_argument, NULL, NULL_OPT}, 141 {"after-context", required_argument, NULL, 'A'}, 142 {"before-context", required_argument, NULL, 'B'}, 143 {"context", optional_argument, NULL, 'C'}, 144 {"devices", required_argument, NULL, 'D'}, 145 {"extended-regexp", no_argument, NULL, 'E'}, 146 {"fixed-strings", no_argument, NULL, 'F'}, 147 {"basic-regexp", no_argument, NULL, 'G'}, 148 {"with-filename", no_argument, NULL, 'H'}, 149 {"binary", no_argument, NULL, 'U'}, 150 {"version", no_argument, NULL, 'V'}, 151 {"text", no_argument, NULL, 'a'}, 152 {"byte-offset", no_argument, NULL, 'b'}, 153 {"count", no_argument, NULL, 'c'}, 154 {"regexp", required_argument, NULL, 'e'}, 155 {"file", required_argument, NULL, 'f'}, 156 {"no-filename", no_argument, NULL, 'h'}, 157 {"ignore-case", no_argument, NULL, 'i'}, 158 {"files-without-match", no_argument, NULL, 'L'}, 159 {"files-with-matches", no_argument, NULL, 'l'}, 160 {"max-count", required_argument, NULL, 'm'}, 161 {"line-number", no_argument, NULL, 'n'}, 162 {"quiet", no_argument, NULL, 'q'}, 163 {"silent", no_argument, NULL, 'q'}, 164 {"recursive", no_argument, NULL, 'r'}, 165 {"no-messages", no_argument, NULL, 's'}, 166 {"revert-match", no_argument, NULL, 'v'}, 167 {"word-regexp", no_argument, NULL, 'w'}, 168 {"line-regexp", no_argument, NULL, 'x'}, 169 {"unix-byte-offsets", no_argument, NULL, 'u'}, 170#ifndef NOZ 171 {"decompress", no_argument, NULL, 'Z'}, 172#endif 173 {NULL, no_argument, NULL, 0} 174}; 175 176 177static void 178add_pattern(char *pat, size_t len) 179{ 180 if (!xflag && (len == 0 || matchall)) { 181 matchall = 1; 182 return; 183 } 184 if (patterns == pattern_sz) { 185 pattern_sz *= 2; 186 pattern = grep_reallocarray(pattern, ++pattern_sz, sizeof(*pattern)); 187 } 188 if (len > 0 && pat[len - 1] == '\n') 189 --len; 190 /* pat may not be NUL-terminated */ 191 if (wflag && !Fflag) { 192 int bol = 0, eol = 0, extra; 193 if (pat[0] == '^') 194 bol = 1; 195 if (len > 0 && pat[len - 1] == '$') 196 eol = 1; 197 extra = Eflag ? 2 : 4; 198 pattern[patterns] = grep_malloc(len + 15 + extra); 199 snprintf(pattern[patterns], len + 15 + extra, 200 "%s[[:<:]]%s%.*s%s[[:>:]]%s", 201 bol ? "^" : "", 202 Eflag ? "(" : "\\(", 203 (int)len - bol - eol, pat + bol, 204 Eflag ? ")" : "\\)", 205 eol ? "$" : ""); 206 len += 14 + extra; 207 } else { 208 pattern[patterns] = grep_malloc(len + 1); 209 memcpy(pattern[patterns], pat, len); 210 pattern[patterns][len] = '\0'; 211 } 212 ++patterns; 213} 214 215static void 216add_patterns(char *pats) 217{ 218 char *nl; 219 220 while ((nl = strchr(pats, '\n')) != NULL) { 221 add_pattern(pats, nl - pats); 222 pats = nl + 1; 223 } 224 add_pattern(pats, strlen(pats)); 225} 226 227static void 228read_patterns(const char *fn) 229{ 230 FILE *f; 231 char *line; 232 ssize_t len; 233 size_t linesize; 234 235 if ((f = fopen(fn, "r")) == NULL) 236 err(2, "%s", fn); 237 line = NULL; 238 linesize = 0; 239 while ((len = getline(&line, &linesize, f)) != -1) 240 add_pattern(line, *line == '\n' ? 0 : len); 241 if (ferror(f)) 242 err(2, "%s", fn); 243 fclose(f); 244 free(line); 245} 246 247int 248main(int argc, char *argv[]) 249{ 250 int c, lastc, prevoptind, newarg, i, needpattern, exprs, expr_sz; 251 struct patfile *patfile, *pf_next; 252 long l; 253 char **expr; 254 const char *errstr; 255 256 if (pledge("stdio rpath", NULL) == -1) 257 err(2, "pledge"); 258 259 SLIST_INIT(&patfilelh); 260 switch (__progname[0]) { 261 case 'e': 262 Eflag = 1; 263 break; 264 case 'f': 265 Fflag = 1; 266 break; 267#ifndef NOZ 268 case 'z': 269 Zflag = 1; 270 switch(__progname[1]) { 271 case 'e': 272 Eflag = 1; 273 break; 274 case 'f': 275 Fflag = 1; 276 break; 277 } 278 break; 279#endif 280 } 281 282 lastc = '\0'; 283 newarg = 1; 284 prevoptind = 1; 285 needpattern = 1; 286 expr_sz = exprs = 0; 287 expr = NULL; 288 while ((c = getopt_long(argc, argv, optstr, 289 long_options, NULL)) != -1) { 290 switch (c) { 291 case '0': case '1': case '2': case '3': case '4': 292 case '5': case '6': case '7': case '8': case '9': 293 if (newarg || !isdigit(lastc)) 294 Aflag = 0; 295 else if (Aflag > INT_MAX / 10) 296 errx(2, "context out of range"); 297 Aflag = Bflag = (Aflag * 10) + (c - '0'); 298 break; 299 case 'A': 300 case 'B': 301 l = strtonum(optarg, 1, INT_MAX, &errstr); 302 if (errstr != NULL) 303 errx(2, "context %s", errstr); 304 if (c == 'A') 305 Aflag = (int)l; 306 else 307 Bflag = (int)l; 308 break; 309 case 'C': 310 if (optarg == NULL) 311 Aflag = Bflag = 2; 312 else { 313 l = strtonum(optarg, 1, INT_MAX, &errstr); 314 if (errstr != NULL) 315 errx(2, "context %s", errstr); 316 Aflag = Bflag = (int)l; 317 } 318 break; 319 case 'E': 320 Fflag = 0; 321 Eflag = 1; 322 break; 323 case 'F': 324 Eflag = 0; 325 Fflag = 1; 326 break; 327 case 'G': 328 Eflag = Fflag = 0; 329 break; 330 case 'H': 331 Hflag = 1; 332 break; 333 case 'I': 334 binbehave = BIN_FILE_SKIP; 335 break; 336 case 'L': 337 lflag = 0; 338 Lflag = qflag = 1; 339 break; 340 case 'R': 341 case 'r': 342 Rflag = 1; 343 break; 344 case 'U': 345 binbehave = BIN_FILE_BIN; 346 break; 347 case 'V': 348 fprintf(stderr, "grep version %u.%u\n", VER_MAJ, VER_MIN); 349 exit(0); 350 break; 351#ifndef NOZ 352 case 'Z': 353 Zflag = 1; 354 break; 355#endif 356 case 'a': 357 binbehave = BIN_FILE_TEXT; 358 break; 359 case 'b': 360 bflag = 1; 361 break; 362 case 'c': 363 cflag = 1; 364 break; 365 case 'e': 366 /* defer adding of expressions until all arguments are parsed */ 367 if (exprs == expr_sz) { 368 expr_sz *= 2; 369 expr = grep_reallocarray(expr, ++expr_sz, 370 sizeof(*expr)); 371 } 372 needpattern = 0; 373 expr[exprs] = optarg; 374 ++exprs; 375 break; 376 case 'f': 377 patfile = grep_malloc(sizeof(*patfile)); 378 patfile->pf_file = optarg; 379 SLIST_INSERT_HEAD(&patfilelh, patfile, pf_next); 380 needpattern = 0; 381 break; 382 case 'h': 383 hflag = 1; 384 break; 385 case 'i': 386 case 'y': 387 iflag = 1; 388 cflags |= REG_ICASE; 389 break; 390 case 'l': 391 Lflag = 0; 392 lflag = qflag = 1; 393 break; 394 case 'm': 395 mflag = 1; 396 mlimit = mcount = strtonum(optarg, 0, LLONG_MAX, 397 &errstr); 398 if (errstr != NULL) 399 errx(2, "invalid max-count %s: %s", 400 optarg, errstr); 401 break; 402 case 'n': 403 nflag = 1; 404 break; 405 case 'o': 406 oflag = 1; 407 break; 408 case 'q': 409 qflag = 1; 410 break; 411 case 's': 412 sflag = 1; 413 break; 414 case 'v': 415 vflag = 1; 416 break; 417 case 'w': 418 wflag = 1; 419 break; 420 case 'x': 421 xflag = 1; 422 break; 423 case BIN_OPT: 424 if (strcmp("binary", optarg) == 0) 425 binbehave = BIN_FILE_BIN; 426 else if (strcmp("without-match", optarg) == 0) 427 binbehave = BIN_FILE_SKIP; 428 else if (strcmp("text", optarg) == 0) 429 binbehave = BIN_FILE_TEXT; 430 else 431 errx(2, "Unknown binary-files option"); 432 break; 433 case 'u': 434 case MMAP_OPT: 435 /* default, compatibility */ 436 break; 437 case LABEL_OPT: 438 labelname = optarg; 439 break; 440 case LINEBUF_OPT: 441 lbflag = 1; 442 break; 443 case NULL_OPT: 444 nullflag = 1; 445 break; 446 case HELP_OPT: 447 default: 448 usage(); 449 } 450 lastc = c; 451 newarg = optind != prevoptind; 452 prevoptind = optind; 453 } 454 argc -= optind; 455 argv += optind; 456 457 for (i = 0; i < exprs; i++) 458 add_patterns(expr[i]); 459 free(expr); 460 expr = NULL; 461 462 for (patfile = SLIST_FIRST(&patfilelh); patfile != NULL; 463 patfile = pf_next) { 464 pf_next = SLIST_NEXT(patfile, pf_next); 465 read_patterns(patfile->pf_file); 466 free(patfile); 467 } 468 469 if (argc == 0 && needpattern) 470 usage(); 471 472 if (argc != 0 && needpattern) { 473 add_patterns(*argv); 474 --argc; 475 ++argv; 476 } 477 if (argc == 1 && strcmp(*argv, "-") == 0) { 478 /* stdin */ 479 --argc; 480 ++argv; 481 } 482 483 if (Eflag) 484 cflags |= REG_EXTENDED; 485 if (Fflag) 486 cflags |= REG_NOSPEC; 487#ifdef SMALL 488 /* Sorry, this won't work */ 489 if (Fflag && wflag) 490 errx(1, "Can't use small fgrep with -w"); 491#endif 492 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern)); 493 r_pattern = grep_calloc(patterns, sizeof(*r_pattern)); 494 for (i = 0; i < patterns; ++i) { 495 /* Check if cheating is allowed (always is for fgrep). */ 496#ifndef SMALL 497 if (Fflag) { 498 fgrepcomp(&fg_pattern[i], pattern[i]); 499 } else 500#endif 501 { 502 if (fastcomp(&fg_pattern[i], pattern[i])) { 503 /* Fall back to full regex library */ 504 c = regcomp(&r_pattern[i], pattern[i], cflags); 505 if (c != 0) { 506 regerror(c, &r_pattern[i], re_error, 507 RE_ERROR_BUF); 508 errx(2, "%s", re_error); 509 } 510 } 511 } 512 } 513 514 if (lbflag) 515 setvbuf(stdout, NULL, _IOLBF, 0); 516 517 if ((argc == 0 || argc == 1) && !Rflag && !Hflag) 518 hflag = 1; 519 520 if (argc == 0 && !Rflag) 521 exit(!procfile(NULL)); 522 523 if (Rflag) 524 c = grep_tree(argv); 525 else 526 for (c = 0; argc--; ++argv) 527 c |= procfile(*argv); 528 529 exit(c ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1)); 530} 531