util.c revision 228319
1220422Sgabor/*	$NetBSD: util.c,v 1.9 2011/02/27 17:33:37 joerg Exp $	*/
2220422Sgabor/*	$FreeBSD: head/usr.bin/grep/util.c 228319 2011-12-07 12:25:28Z gabor $	*/
3210389Sgabor/*	$OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $	*/
4210389Sgabor
5210389Sgabor/*-
6211496Sdes * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav
7210389Sgabor * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
8210389Sgabor * All rights reserved.
9210389Sgabor *
10210389Sgabor * Redistribution and use in source and binary forms, with or without
11210389Sgabor * modification, are permitted provided that the following conditions
12210389Sgabor * are met:
13210389Sgabor * 1. Redistributions of source code must retain the above copyright
14210389Sgabor *    notice, this list of conditions and the following disclaimer.
15210389Sgabor * 2. Redistributions in binary form must reproduce the above copyright
16210389Sgabor *    notice, this list of conditions and the following disclaimer in the
17210389Sgabor *    documentation and/or other materials provided with the distribution.
18210389Sgabor *
19210389Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20210389Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21210389Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22210389Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23210389Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24210389Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25210389Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26210389Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27210389Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28210389Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29210389Sgabor * SUCH DAMAGE.
30210389Sgabor */
31210389Sgabor
32210389Sgabor#include <sys/cdefs.h>
33210389Sgabor__FBSDID("$FreeBSD: head/usr.bin/grep/util.c 228319 2011-12-07 12:25:28Z gabor $");
34210389Sgabor
35210389Sgabor#include <sys/stat.h>
36210389Sgabor#include <sys/types.h>
37210389Sgabor
38210389Sgabor#include <ctype.h>
39210389Sgabor#include <err.h>
40210389Sgabor#include <errno.h>
41210389Sgabor#include <fnmatch.h>
42210389Sgabor#include <fts.h>
43210389Sgabor#include <libgen.h>
44210578Sgabor#include <stdbool.h>
45210389Sgabor#include <stdio.h>
46210389Sgabor#include <stdlib.h>
47210389Sgabor#include <string.h>
48210389Sgabor#include <unistd.h>
49210389Sgabor#include <wchar.h>
50210389Sgabor#include <wctype.h>
51210389Sgabor
52226035Sgabor#include "fastmatch.h"
53210389Sgabor#include "grep.h"
54210389Sgabor
55210389Sgaborstatic int	 linesqueued;
56210389Sgaborstatic int	 procline(struct str *l, int);
57210389Sgabor
58210578Sgaborbool
59210578Sgaborfile_matching(const char *fname)
60210578Sgabor{
61220421Sgabor	char *fname_base;
62210578Sgabor	bool ret;
63210578Sgabor
64210578Sgabor	ret = finclude ? false : true;
65220421Sgabor	fname_base = basename(fname);
66210578Sgabor
67210578Sgabor	for (unsigned int i = 0; i < fpatterns; ++i) {
68220421Sgabor		if (fnmatch(fpattern[i].pat, fname, 0) == 0 ||
69220421Sgabor		    fnmatch(fpattern[i].pat, fname_base, 0) == 0) {
70210578Sgabor			if (fpattern[i].mode == EXCL_PAT)
71210578Sgabor				return (false);
72210578Sgabor			else
73210578Sgabor				ret = true;
74210578Sgabor		}
75210578Sgabor	}
76210578Sgabor	return (ret);
77210578Sgabor}
78210578Sgabor
79211364Sgaborstatic inline bool
80210578Sgabordir_matching(const char *dname)
81210578Sgabor{
82210578Sgabor	bool ret;
83210578Sgabor
84210578Sgabor	ret = dinclude ? false : true;
85210578Sgabor
86210578Sgabor	for (unsigned int i = 0; i < dpatterns; ++i) {
87210578Sgabor		if (dname != NULL &&
88224938Sgabor		    fnmatch(dpattern[i].pat, dname, 0) == 0) {
89210578Sgabor			if (dpattern[i].mode == EXCL_PAT)
90210578Sgabor				return (false);
91210578Sgabor			else
92210578Sgabor				ret = true;
93210578Sgabor		}
94210578Sgabor	}
95210578Sgabor	return (ret);
96210578Sgabor}
97210578Sgabor
98210389Sgabor/*
99210389Sgabor * Processes a directory when a recursive search is performed with
100210389Sgabor * the -R option.  Each appropriate file is passed to procfile().
101210389Sgabor */
102210389Sgaborint
103210389Sgaborgrep_tree(char **argv)
104210389Sgabor{
105210389Sgabor	FTS *fts;
106210389Sgabor	FTSENT *p;
107210389Sgabor	int c, fts_flags;
108210389Sgabor	bool ok;
109210389Sgabor
110210389Sgabor	c = fts_flags = 0;
111210389Sgabor
112210389Sgabor	switch(linkbehave) {
113210389Sgabor	case LINK_EXPLICIT:
114210389Sgabor		fts_flags = FTS_COMFOLLOW;
115210389Sgabor		break;
116210389Sgabor	case LINK_SKIP:
117210389Sgabor		fts_flags = FTS_PHYSICAL;
118210389Sgabor		break;
119210389Sgabor	default:
120210389Sgabor		fts_flags = FTS_LOGICAL;
121210389Sgabor
122210389Sgabor	}
123210389Sgabor
124210389Sgabor	fts_flags |= FTS_NOSTAT | FTS_NOCHDIR;
125210389Sgabor
126210389Sgabor	if (!(fts = fts_open(argv, fts_flags, NULL)))
127210430Sdelphij		err(2, "fts_open");
128210389Sgabor	while ((p = fts_read(fts)) != NULL) {
129210389Sgabor		switch (p->fts_info) {
130210389Sgabor		case FTS_DNR:
131210389Sgabor			/* FALLTHROUGH */
132210389Sgabor		case FTS_ERR:
133228319Sgabor			file_err = true;
134228097Sgabor			if(!sflag)
135228097Sgabor				warnx("%s: %s", p->fts_path, strerror(p->fts_errno));
136210389Sgabor			break;
137210389Sgabor		case FTS_D:
138210389Sgabor			/* FALLTHROUGH */
139210389Sgabor		case FTS_DP:
140224938Sgabor			if (dexclude || dinclude)
141224938Sgabor				if (!dir_matching(p->fts_name) ||
142224938Sgabor				    !dir_matching(p->fts_path))
143224938Sgabor					fts_set(fts, p, FTS_SKIP);
144210389Sgabor			break;
145210389Sgabor		case FTS_DC:
146210389Sgabor			/* Print a warning for recursive directory loop */
147210389Sgabor			warnx("warning: %s: recursive directory loop",
148210389Sgabor				p->fts_path);
149210389Sgabor			break;
150210389Sgabor		default:
151210389Sgabor			/* Check for file exclusion/inclusion */
152210389Sgabor			ok = true;
153210578Sgabor			if (fexclude || finclude)
154210578Sgabor				ok &= file_matching(p->fts_path);
155210389Sgabor
156210389Sgabor			if (ok)
157210389Sgabor				c += procfile(p->fts_path);
158210389Sgabor			break;
159210389Sgabor		}
160210389Sgabor	}
161210389Sgabor
162210430Sdelphij	fts_close(fts);
163210389Sgabor	return (c);
164210389Sgabor}
165210389Sgabor
166210389Sgabor/*
167210389Sgabor * Opens a file and processes it.  Each file is processed line-by-line
168210389Sgabor * passing the lines to procline().
169210389Sgabor */
170210389Sgaborint
171210389Sgaborprocfile(const char *fn)
172210389Sgabor{
173210389Sgabor	struct file *f;
174210389Sgabor	struct stat sb;
175210389Sgabor	struct str ln;
176210389Sgabor	mode_t s;
177210389Sgabor	int c, t;
178210389Sgabor
179210389Sgabor	if (mflag && (mcount <= 0))
180210389Sgabor		return (0);
181210389Sgabor
182210389Sgabor	if (strcmp(fn, "-") == 0) {
183210389Sgabor		fn = label != NULL ? label : getstr(1);
184211463Sgabor		f = grep_open(NULL);
185210389Sgabor	} else {
186210389Sgabor		if (!stat(fn, &sb)) {
187210389Sgabor			/* Check if we need to process the file */
188210389Sgabor			s = sb.st_mode & S_IFMT;
189210389Sgabor			if (s == S_IFDIR && dirbehave == DIR_SKIP)
190210389Sgabor				return (0);
191210389Sgabor			if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK
192210389Sgabor				|| s == S_IFSOCK) && devbehave == DEV_SKIP)
193210389Sgabor					return (0);
194210389Sgabor		}
195210389Sgabor		f = grep_open(fn);
196210389Sgabor	}
197210389Sgabor	if (f == NULL) {
198228319Sgabor		file_err = true;
199210389Sgabor		if (!sflag)
200210389Sgabor			warn("%s", fn);
201210389Sgabor		return (0);
202210389Sgabor	}
203210389Sgabor
204210389Sgabor	ln.file = grep_malloc(strlen(fn) + 1);
205210389Sgabor	strcpy(ln.file, fn);
206210389Sgabor	ln.line_no = 0;
207210389Sgabor	ln.len = 0;
208210389Sgabor	linesqueued = 0;
209210389Sgabor	tail = 0;
210210389Sgabor	ln.off = -1;
211210389Sgabor
212210389Sgabor	for (c = 0;  c == 0 || !(lflag || qflag); ) {
213210389Sgabor		ln.off += ln.len + 1;
214211463Sgabor		if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL || ln.len == 0) {
215210389Sgabor			if (ln.line_no == 0 && matchall)
216210389Sgabor				exit(0);
217210389Sgabor			else
218210389Sgabor				break;
219210389Sgabor		}
220210389Sgabor		if (ln.len > 0 && ln.dat[ln.len - 1] == '\n')
221210389Sgabor			--ln.len;
222210389Sgabor		ln.line_no++;
223210389Sgabor
224210389Sgabor		/* Return if we need to skip a binary file */
225210389Sgabor		if (f->binary && binbehave == BINFILE_SKIP) {
226210389Sgabor			grep_close(f);
227210430Sdelphij			free(ln.file);
228210389Sgabor			free(f);
229210389Sgabor			return (0);
230210389Sgabor		}
231210389Sgabor		/* Process the file line-by-line */
232210389Sgabor		if ((t = procline(&ln, f->binary)) == 0 && Bflag > 0) {
233210389Sgabor			enqueue(&ln);
234210389Sgabor			linesqueued++;
235210389Sgabor		}
236210389Sgabor		c += t;
237226273Sgabor		if (mflag && mcount <= 0)
238226035Sgabor			break;
239210389Sgabor	}
240210389Sgabor	if (Bflag > 0)
241210389Sgabor		clearqueue();
242210389Sgabor	grep_close(f);
243210389Sgabor
244210389Sgabor	if (cflag) {
245210389Sgabor		if (!hflag)
246210389Sgabor			printf("%s:", ln.file);
247210389Sgabor		printf("%u\n", c);
248210389Sgabor	}
249210461Sgabor	if (lflag && !qflag && c != 0)
250228093Sgabor		printf("%s%c", fn, nullflag ? 0 : '\n');
251210461Sgabor	if (Lflag && !qflag && c == 0)
252228093Sgabor		printf("%s%c", fn, nullflag ? 0 : '\n');
253210389Sgabor	if (c && !cflag && !lflag && !Lflag &&
254210389Sgabor	    binbehave == BINFILE_BIN && f->binary && !qflag)
255210622Sgabor		printf(getstr(8), fn);
256210389Sgabor
257210430Sdelphij	free(ln.file);
258210389Sgabor	free(f);
259210389Sgabor	return (c);
260210389Sgabor}
261210389Sgabor
262210389Sgabor#define iswword(x)	(iswalnum((x)) || (x) == L'_')
263210389Sgabor
264210389Sgabor/*
265210389Sgabor * Processes a line comparing it with the specified patterns.  Each pattern
266210389Sgabor * is looped to be compared along with the full string, saving each and every
267210389Sgabor * match, which is necessary to colorize the output and to count the
268210389Sgabor * matches.  The matching lines are passed to printline() to display the
269210389Sgabor * appropriate output.
270210389Sgabor */
271220421Sgaborstatic int
272210389Sgaborprocline(struct str *l, int nottext)
273210389Sgabor{
274210389Sgabor	regmatch_t matches[MAX_LINE_MATCHES];
275210389Sgabor	regmatch_t pmatch;
276210389Sgabor	size_t st = 0;
277210389Sgabor	unsigned int i;
278210389Sgabor	int c = 0, m = 0, r = 0;
279210389Sgabor
280226035Sgabor	/* Loop to process the whole line */
281226035Sgabor	while (st <= l->len) {
282226035Sgabor		pmatch.rm_so = st;
283226035Sgabor		pmatch.rm_eo = l->len;
284210389Sgabor
285226035Sgabor		/* Loop to compare with all the patterns */
286226035Sgabor		for (i = 0; i < patterns; i++) {
287226035Sgabor			if (fg_pattern[i].pattern)
288226035Sgabor				r = fastexec(&fg_pattern[i],
289226035Sgabor				    l->dat, 1, &pmatch, eflags);
290226035Sgabor			else
291226035Sgabor				r = regexec(&r_pattern[i], l->dat, 1,
292226035Sgabor				    &pmatch, eflags);
293226035Sgabor			r = (r == 0) ? 0 : REG_NOMATCH;
294226035Sgabor			st = (cflags & REG_NOSUB)
295226035Sgabor				? (size_t)l->len
296226035Sgabor				: (size_t)pmatch.rm_eo;
297226035Sgabor			if (r == REG_NOMATCH)
298226035Sgabor				continue;
299226035Sgabor			/* Check for full match */
300226035Sgabor			if (r == 0 && xflag)
301226035Sgabor				if (pmatch.rm_so != 0 ||
302226035Sgabor				    (size_t)pmatch.rm_eo != l->len)
303226035Sgabor					r = REG_NOMATCH;
304226035Sgabor			/* Check for whole word match */
305226035Sgabor			if (r == 0 && (wflag || fg_pattern[i].word)) {
306226035Sgabor				wint_t wbegin, wend;
307210389Sgabor
308226035Sgabor				wbegin = wend = L' ';
309226035Sgabor				if (pmatch.rm_so != 0 &&
310226035Sgabor				    sscanf(&l->dat[pmatch.rm_so - 1],
311226035Sgabor				    "%lc", &wbegin) != 1)
312226035Sgabor					r = REG_NOMATCH;
313226035Sgabor				else if ((size_t)pmatch.rm_eo !=
314226035Sgabor				    l->len &&
315226035Sgabor				    sscanf(&l->dat[pmatch.rm_eo],
316226035Sgabor				    "%lc", &wend) != 1)
317226035Sgabor					r = REG_NOMATCH;
318226035Sgabor				else if (iswword(wbegin) ||
319226035Sgabor				    iswword(wend))
320226035Sgabor					r = REG_NOMATCH;
321210389Sgabor			}
322226035Sgabor			if (r == 0) {
323226035Sgabor				if (m == 0)
324226035Sgabor					c++;
325226035Sgabor				if (m < MAX_LINE_MATCHES)
326226035Sgabor					matches[m++] = pmatch;
327226035Sgabor				/* matches - skip further patterns */
328226035Sgabor				if ((color == NULL && !oflag) ||
329226035Sgabor				    qflag || lflag)
330226035Sgabor					break;
331210389Sgabor			}
332226035Sgabor		}
333210389Sgabor
334226035Sgabor		if (vflag) {
335226035Sgabor			c = !c;
336226035Sgabor			break;
337210389Sgabor		}
338210389Sgabor
339226035Sgabor		/* One pass if we are not recording matches */
340226035Sgabor		if ((color == NULL && !oflag) || qflag || lflag)
341226035Sgabor			break;
342226035Sgabor
343226035Sgabor		if (st == (size_t)pmatch.rm_so)
344226035Sgabor			break; 	/* No matches */
345226035Sgabor	}
346226035Sgabor
347226035Sgabor
348226035Sgabor	/* Count the matches if we have a match limit */
349226035Sgabor	if (mflag)
350226035Sgabor		mcount -= c;
351226035Sgabor
352210389Sgabor	if (c && binbehave == BINFILE_BIN && nottext)
353210389Sgabor		return (c); /* Binary file */
354210389Sgabor
355210389Sgabor	/* Dealing with the context */
356210479Sgabor	if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) {
357210389Sgabor		if (c) {
358210389Sgabor			if (!first && !prev && !tail && Aflag)
359210389Sgabor				printf("--\n");
360210389Sgabor			tail = Aflag;
361210389Sgabor			if (Bflag > 0) {
362210389Sgabor				if (!first && !prev)
363210389Sgabor					printf("--\n");
364210389Sgabor				printqueue();
365210389Sgabor			}
366210389Sgabor			linesqueued = 0;
367210389Sgabor			printline(l, ':', matches, m);
368210389Sgabor		} else {
369210389Sgabor			printline(l, '-', matches, m);
370210389Sgabor			tail--;
371210389Sgabor		}
372210389Sgabor	}
373210389Sgabor
374210389Sgabor	if (c) {
375210389Sgabor		prev = true;
376210389Sgabor		first = false;
377210389Sgabor	} else
378210389Sgabor		prev = false;
379210389Sgabor
380210389Sgabor	return (c);
381210389Sgabor}
382210389Sgabor
383210389Sgabor/*
384210389Sgabor * Safe malloc() for internal use.
385210389Sgabor */
386210389Sgaborvoid *
387210389Sgaborgrep_malloc(size_t size)
388210389Sgabor{
389210389Sgabor	void *ptr;
390210389Sgabor
391210389Sgabor	if ((ptr = malloc(size)) == NULL)
392210389Sgabor		err(2, "malloc");
393210389Sgabor	return (ptr);
394210389Sgabor}
395210389Sgabor
396210389Sgabor/*
397210389Sgabor * Safe calloc() for internal use.
398210389Sgabor */
399210389Sgaborvoid *
400210389Sgaborgrep_calloc(size_t nmemb, size_t size)
401210389Sgabor{
402210389Sgabor	void *ptr;
403210389Sgabor
404210389Sgabor	if ((ptr = calloc(nmemb, size)) == NULL)
405210389Sgabor		err(2, "calloc");
406210389Sgabor	return (ptr);
407210389Sgabor}
408210389Sgabor
409210389Sgabor/*
410210389Sgabor * Safe realloc() for internal use.
411210389Sgabor */
412210389Sgaborvoid *
413210389Sgaborgrep_realloc(void *ptr, size_t size)
414210389Sgabor{
415210389Sgabor
416210389Sgabor	if ((ptr = realloc(ptr, size)) == NULL)
417210389Sgabor		err(2, "realloc");
418210389Sgabor	return (ptr);
419210389Sgabor}
420210389Sgabor
421210389Sgabor/*
422210578Sgabor * Safe strdup() for internal use.
423210578Sgabor */
424210578Sgaborchar *
425210578Sgaborgrep_strdup(const char *str)
426210578Sgabor{
427210578Sgabor	char *ret;
428210578Sgabor
429210578Sgabor	if ((ret = strdup(str)) == NULL)
430210578Sgabor		err(2, "strdup");
431210578Sgabor	return (ret);
432210578Sgabor}
433210578Sgabor
434210578Sgabor/*
435210389Sgabor * Prints a matching line according to the command line options.
436210389Sgabor */
437210389Sgaborvoid
438210389Sgaborprintline(struct str *line, int sep, regmatch_t *matches, int m)
439210389Sgabor{
440210389Sgabor	size_t a = 0;
441210389Sgabor	int i, n = 0;
442210389Sgabor
443210389Sgabor	if (!hflag) {
444228093Sgabor		if (!nullflag) {
445210389Sgabor			fputs(line->file, stdout);
446228093Sgabor			++n;
447228093Sgabor		} else {
448210389Sgabor			printf("%s", line->file);
449210389Sgabor			putchar(0);
450210389Sgabor		}
451210389Sgabor	}
452210389Sgabor	if (nflag) {
453210389Sgabor		if (n > 0)
454210389Sgabor			putchar(sep);
455210389Sgabor		printf("%d", line->line_no);
456210389Sgabor		++n;
457210389Sgabor	}
458210389Sgabor	if (bflag) {
459210389Sgabor		if (n > 0)
460210389Sgabor			putchar(sep);
461210389Sgabor		printf("%lld", (long long)line->off);
462210389Sgabor		++n;
463210389Sgabor	}
464210389Sgabor	if (n)
465210389Sgabor		putchar(sep);
466210389Sgabor	/* --color and -o */
467210389Sgabor	if ((oflag || color) && m > 0) {
468210389Sgabor		for (i = 0; i < m; i++) {
469210389Sgabor			if (!oflag)
470210389Sgabor				fwrite(line->dat + a, matches[i].rm_so - a, 1,
471210389Sgabor				    stdout);
472210389Sgabor			if (color)
473210389Sgabor				fprintf(stdout, "\33[%sm\33[K", color);
474210389Sgabor
475210389Sgabor				fwrite(line->dat + matches[i].rm_so,
476210389Sgabor				    matches[i].rm_eo - matches[i].rm_so, 1,
477210389Sgabor				    stdout);
478210389Sgabor			if (color)
479210389Sgabor				fprintf(stdout, "\33[m\33[K");
480210389Sgabor			a = matches[i].rm_eo;
481210389Sgabor			if (oflag)
482210389Sgabor				putchar('\n');
483210389Sgabor		}
484210389Sgabor		if (!oflag) {
485210389Sgabor			if (line->len - a > 0)
486210389Sgabor				fwrite(line->dat + a, line->len - a, 1, stdout);
487210389Sgabor			putchar('\n');
488210389Sgabor		}
489210389Sgabor	} else {
490210389Sgabor		fwrite(line->dat, line->len, 1, stdout);
491210389Sgabor		putchar('\n');
492210389Sgabor	}
493210389Sgabor}
494