util.c revision 220421
1210389Sgabor/*	$OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $	*/
2210389Sgabor
3210389Sgabor/*-
4211496Sdes * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav
5210389Sgabor * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
6210389Sgabor * All rights reserved.
7210389Sgabor *
8210389Sgabor * Redistribution and use in source and binary forms, with or without
9210389Sgabor * modification, are permitted provided that the following conditions
10210389Sgabor * are met:
11210389Sgabor * 1. Redistributions of source code must retain the above copyright
12210389Sgabor *    notice, this list of conditions and the following disclaimer.
13210389Sgabor * 2. Redistributions in binary form must reproduce the above copyright
14210389Sgabor *    notice, this list of conditions and the following disclaimer in the
15210389Sgabor *    documentation and/or other materials provided with the distribution.
16210389Sgabor *
17210389Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18210389Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19210389Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20210389Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21210389Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22210389Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23210389Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24210389Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25210389Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26210389Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27210389Sgabor * SUCH DAMAGE.
28210389Sgabor */
29210389Sgabor
30210389Sgabor#include <sys/cdefs.h>
31210389Sgabor__FBSDID("$FreeBSD: head/usr.bin/grep/util.c 220421 2011-04-07 13:01:03Z gabor $");
32210389Sgabor
33210389Sgabor#include <sys/stat.h>
34210389Sgabor#include <sys/types.h>
35210389Sgabor
36210389Sgabor#include <ctype.h>
37210389Sgabor#include <err.h>
38210389Sgabor#include <errno.h>
39210389Sgabor#include <fnmatch.h>
40210389Sgabor#include <fts.h>
41210389Sgabor#include <libgen.h>
42210578Sgabor#include <stdbool.h>
43210389Sgabor#include <stdio.h>
44210389Sgabor#include <stdlib.h>
45210389Sgabor#include <string.h>
46210389Sgabor#include <unistd.h>
47210389Sgabor#include <wchar.h>
48210389Sgabor#include <wctype.h>
49210389Sgabor
50210389Sgabor#include "grep.h"
51210389Sgabor
52210389Sgaborstatic int	 linesqueued;
53210389Sgaborstatic int	 procline(struct str *l, int);
54210389Sgabor
55210578Sgaborbool
56210578Sgaborfile_matching(const char *fname)
57210578Sgabor{
58220421Sgabor	char *fname_base;
59210578Sgabor	bool ret;
60210578Sgabor
61210578Sgabor	ret = finclude ? false : true;
62220421Sgabor	fname_base = basename(fname);
63210578Sgabor
64210578Sgabor	for (unsigned int i = 0; i < fpatterns; ++i) {
65220421Sgabor		if (fnmatch(fpattern[i].pat, fname, 0) == 0 ||
66220421Sgabor		    fnmatch(fpattern[i].pat, fname_base, 0) == 0) {
67210578Sgabor			if (fpattern[i].mode == EXCL_PAT)
68210578Sgabor				return (false);
69210578Sgabor			else
70210578Sgabor				ret = true;
71210578Sgabor		}
72210578Sgabor	}
73210578Sgabor	return (ret);
74210578Sgabor}
75210578Sgabor
76211364Sgaborstatic inline bool
77210578Sgabordir_matching(const char *dname)
78210578Sgabor{
79210578Sgabor	bool ret;
80210578Sgabor
81210578Sgabor	ret = dinclude ? false : true;
82210578Sgabor
83210578Sgabor	for (unsigned int i = 0; i < dpatterns; ++i) {
84210578Sgabor		if (dname != NULL &&
85210578Sgabor		    fnmatch(dname, dpattern[i].pat, 0) == 0) {
86210578Sgabor			if (dpattern[i].mode == EXCL_PAT)
87210578Sgabor				return (false);
88210578Sgabor			else
89210578Sgabor				ret = true;
90210578Sgabor		}
91210578Sgabor	}
92210578Sgabor	return (ret);
93210578Sgabor}
94210578Sgabor
95210389Sgabor/*
96210389Sgabor * Processes a directory when a recursive search is performed with
97210389Sgabor * the -R option.  Each appropriate file is passed to procfile().
98210389Sgabor */
99210389Sgaborint
100210389Sgaborgrep_tree(char **argv)
101210389Sgabor{
102210389Sgabor	FTS *fts;
103210389Sgabor	FTSENT *p;
104210430Sdelphij	char *d, *dir = NULL;
105210389Sgabor	int c, fts_flags;
106210389Sgabor	bool ok;
107210389Sgabor
108210389Sgabor	c = fts_flags = 0;
109210389Sgabor
110210389Sgabor	switch(linkbehave) {
111210389Sgabor	case LINK_EXPLICIT:
112210389Sgabor		fts_flags = FTS_COMFOLLOW;
113210389Sgabor		break;
114210389Sgabor	case LINK_SKIP:
115210389Sgabor		fts_flags = FTS_PHYSICAL;
116210389Sgabor		break;
117210389Sgabor	default:
118210389Sgabor		fts_flags = FTS_LOGICAL;
119210389Sgabor
120210389Sgabor	}
121210389Sgabor
122210389Sgabor	fts_flags |= FTS_NOSTAT | FTS_NOCHDIR;
123210389Sgabor
124210389Sgabor	if (!(fts = fts_open(argv, fts_flags, NULL)))
125210430Sdelphij		err(2, "fts_open");
126210389Sgabor	while ((p = fts_read(fts)) != NULL) {
127210389Sgabor		switch (p->fts_info) {
128210389Sgabor		case FTS_DNR:
129210389Sgabor			/* FALLTHROUGH */
130210389Sgabor		case FTS_ERR:
131210389Sgabor			errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno));
132210389Sgabor			break;
133210389Sgabor		case FTS_D:
134210389Sgabor			/* FALLTHROUGH */
135210389Sgabor		case FTS_DP:
136210389Sgabor			break;
137210389Sgabor		case FTS_DC:
138210389Sgabor			/* Print a warning for recursive directory loop */
139210389Sgabor			warnx("warning: %s: recursive directory loop",
140210389Sgabor				p->fts_path);
141210389Sgabor			break;
142210389Sgabor		default:
143210389Sgabor			/* Check for file exclusion/inclusion */
144210389Sgabor			ok = true;
145210578Sgabor			if (dexclude || dinclude) {
146210430Sdelphij				if ((d = strrchr(p->fts_path, '/')) != NULL) {
147210430Sdelphij					dir = grep_malloc(sizeof(char) *
148210430Sdelphij					    (d - p->fts_path + 1));
149211364Sgabor					memcpy(dir, p->fts_path,
150211364Sgabor					    d - p->fts_path);
151211364Sgabor					dir[d - p->fts_path] = '\0';
152210430Sdelphij				}
153210578Sgabor				ok = dir_matching(dir);
154210430Sdelphij				free(dir);
155210430Sdelphij				dir = NULL;
156210389Sgabor			}
157210578Sgabor			if (fexclude || finclude)
158210578Sgabor				ok &= file_matching(p->fts_path);
159210389Sgabor
160210389Sgabor			if (ok)
161210389Sgabor				c += procfile(p->fts_path);
162210389Sgabor			break;
163210389Sgabor		}
164210389Sgabor	}
165210389Sgabor
166210430Sdelphij	fts_close(fts);
167210389Sgabor	return (c);
168210389Sgabor}
169210389Sgabor
170210389Sgabor/*
171210389Sgabor * Opens a file and processes it.  Each file is processed line-by-line
172210389Sgabor * passing the lines to procline().
173210389Sgabor */
174210389Sgaborint
175210389Sgaborprocfile(const char *fn)
176210389Sgabor{
177210389Sgabor	struct file *f;
178210389Sgabor	struct stat sb;
179210389Sgabor	struct str ln;
180210389Sgabor	mode_t s;
181210389Sgabor	int c, t;
182210389Sgabor
183210389Sgabor	if (mflag && (mcount <= 0))
184210389Sgabor		return (0);
185210389Sgabor
186210389Sgabor	if (strcmp(fn, "-") == 0) {
187210389Sgabor		fn = label != NULL ? label : getstr(1);
188211463Sgabor		f = grep_open(NULL);
189210389Sgabor	} else {
190210389Sgabor		if (!stat(fn, &sb)) {
191210389Sgabor			/* Check if we need to process the file */
192210389Sgabor			s = sb.st_mode & S_IFMT;
193210389Sgabor			if (s == S_IFDIR && dirbehave == DIR_SKIP)
194210389Sgabor				return (0);
195210389Sgabor			if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK
196210389Sgabor				|| s == S_IFSOCK) && devbehave == DEV_SKIP)
197210389Sgabor					return (0);
198210389Sgabor		}
199210389Sgabor		f = grep_open(fn);
200210389Sgabor	}
201210389Sgabor	if (f == NULL) {
202210389Sgabor		if (!sflag)
203210389Sgabor			warn("%s", fn);
204210389Sgabor		if (errno == ENOENT)
205210389Sgabor			notfound = true;
206210389Sgabor		return (0);
207210389Sgabor	}
208210389Sgabor
209210389Sgabor	ln.file = grep_malloc(strlen(fn) + 1);
210210389Sgabor	strcpy(ln.file, fn);
211210389Sgabor	ln.line_no = 0;
212210389Sgabor	ln.len = 0;
213210389Sgabor	linesqueued = 0;
214210389Sgabor	tail = 0;
215210389Sgabor	ln.off = -1;
216210389Sgabor
217210389Sgabor	for (c = 0;  c == 0 || !(lflag || qflag); ) {
218210389Sgabor		ln.off += ln.len + 1;
219211463Sgabor		if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL || ln.len == 0) {
220210389Sgabor			if (ln.line_no == 0 && matchall)
221210389Sgabor				exit(0);
222210389Sgabor			else
223210389Sgabor				break;
224210389Sgabor		}
225210389Sgabor		if (ln.len > 0 && ln.dat[ln.len - 1] == '\n')
226210389Sgabor			--ln.len;
227210389Sgabor		ln.line_no++;
228210389Sgabor
229210389Sgabor		/* Return if we need to skip a binary file */
230210389Sgabor		if (f->binary && binbehave == BINFILE_SKIP) {
231210389Sgabor			grep_close(f);
232210430Sdelphij			free(ln.file);
233210389Sgabor			free(f);
234210389Sgabor			return (0);
235210389Sgabor		}
236210389Sgabor		/* Process the file line-by-line */
237210389Sgabor		if ((t = procline(&ln, f->binary)) == 0 && Bflag > 0) {
238210389Sgabor			enqueue(&ln);
239210389Sgabor			linesqueued++;
240210389Sgabor		}
241210389Sgabor		c += t;
242210389Sgabor
243210389Sgabor		/* Count the matches if we have a match limit */
244210389Sgabor		if (mflag) {
245210389Sgabor			mcount -= t;
246210389Sgabor			if (mcount <= 0)
247210389Sgabor				break;
248210389Sgabor		}
249210389Sgabor	}
250210389Sgabor	if (Bflag > 0)
251210389Sgabor		clearqueue();
252210389Sgabor	grep_close(f);
253210389Sgabor
254210389Sgabor	if (cflag) {
255210389Sgabor		if (!hflag)
256210389Sgabor			printf("%s:", ln.file);
257210389Sgabor		printf("%u\n", c);
258210389Sgabor	}
259210461Sgabor	if (lflag && !qflag && c != 0)
260210389Sgabor		printf("%s\n", fn);
261210461Sgabor	if (Lflag && !qflag && c == 0)
262210389Sgabor		printf("%s\n", fn);
263210389Sgabor	if (c && !cflag && !lflag && !Lflag &&
264210389Sgabor	    binbehave == BINFILE_BIN && f->binary && !qflag)
265210622Sgabor		printf(getstr(8), fn);
266210389Sgabor
267210430Sdelphij	free(ln.file);
268210389Sgabor	free(f);
269210389Sgabor	return (c);
270210389Sgabor}
271210389Sgabor
272210389Sgabor#define iswword(x)	(iswalnum((x)) || (x) == L'_')
273210389Sgabor
274210389Sgabor/*
275210389Sgabor * Processes a line comparing it with the specified patterns.  Each pattern
276210389Sgabor * is looped to be compared along with the full string, saving each and every
277210389Sgabor * match, which is necessary to colorize the output and to count the
278210389Sgabor * matches.  The matching lines are passed to printline() to display the
279210389Sgabor * appropriate output.
280210389Sgabor */
281220421Sgaborstatic int
282210389Sgaborprocline(struct str *l, int nottext)
283210389Sgabor{
284210389Sgabor	regmatch_t matches[MAX_LINE_MATCHES];
285210389Sgabor	regmatch_t pmatch;
286210389Sgabor	size_t st = 0;
287210389Sgabor	unsigned int i;
288210389Sgabor	int c = 0, m = 0, r = 0;
289210389Sgabor
290210389Sgabor	if (!matchall) {
291210389Sgabor		/* Loop to process the whole line */
292210389Sgabor		while (st <= l->len) {
293210389Sgabor			pmatch.rm_so = st;
294210389Sgabor			pmatch.rm_eo = l->len;
295210389Sgabor
296210389Sgabor			/* Loop to compare with all the patterns */
297210389Sgabor			for (i = 0; i < patterns; i++) {
298210389Sgabor/*
299210389Sgabor * XXX: grep_search() is a workaround for speed up and should be
300210389Sgabor * removed in the future.  See fastgrep.c.
301210389Sgabor */
302210389Sgabor				if (fg_pattern[i].pattern) {
303210389Sgabor					r = grep_search(&fg_pattern[i],
304210389Sgabor					    (unsigned char *)l->dat,
305210389Sgabor					    l->len, &pmatch);
306210389Sgabor					r = (r == 0) ? 0 : REG_NOMATCH;
307210389Sgabor					st = pmatch.rm_eo;
308210389Sgabor				} else {
309210389Sgabor					r = regexec(&r_pattern[i], l->dat, 1,
310210389Sgabor					    &pmatch, eflags);
311210389Sgabor					r = (r == 0) ? 0 : REG_NOMATCH;
312210389Sgabor					st = pmatch.rm_eo;
313210389Sgabor				}
314210389Sgabor				if (r == REG_NOMATCH)
315210389Sgabor					continue;
316210389Sgabor				/* Check for full match */
317210389Sgabor				if (r == 0 && xflag)
318210389Sgabor					if (pmatch.rm_so != 0 ||
319210389Sgabor					    (size_t)pmatch.rm_eo != l->len)
320210389Sgabor						r = REG_NOMATCH;
321210389Sgabor				/* Check for whole word match */
322220421Sgabor				if (r == 0 && fg_pattern[i].word &&
323220421Sgabor				    pmatch.rm_so != 0) {
324211364Sgabor					wint_t wbegin, wend;
325210389Sgabor
326211364Sgabor					wbegin = wend = L' ';
327211364Sgabor					if (pmatch.rm_so != 0 &&
328211364Sgabor					    sscanf(&l->dat[pmatch.rm_so - 1],
329211364Sgabor					    "%lc", &wbegin) != 1)
330210389Sgabor						r = REG_NOMATCH;
331211364Sgabor					else if ((size_t)pmatch.rm_eo != l->len &&
332211364Sgabor					    sscanf(&l->dat[pmatch.rm_eo],
333211364Sgabor					    "%lc", &wend) != 1)
334211364Sgabor						r = REG_NOMATCH;
335211364Sgabor					else if (iswword(wbegin) || iswword(wend))
336211364Sgabor						r = REG_NOMATCH;
337210389Sgabor				}
338210389Sgabor				if (r == 0) {
339210389Sgabor					if (m == 0)
340210389Sgabor						c++;
341210389Sgabor					if (m < MAX_LINE_MATCHES)
342210389Sgabor						matches[m++] = pmatch;
343210389Sgabor					/* matches - skip further patterns */
344210461Sgabor					if ((color != NULL && !oflag) || qflag || lflag)
345210461Sgabor						break;
346210389Sgabor				}
347210389Sgabor			}
348210389Sgabor
349210389Sgabor			if (vflag) {
350210389Sgabor				c = !c;
351210389Sgabor				break;
352210389Sgabor			}
353210389Sgabor			/* One pass if we are not recording matches */
354210461Sgabor			if ((color != NULL && !oflag) || qflag || lflag)
355210389Sgabor				break;
356210389Sgabor
357210389Sgabor			if (st == (size_t)pmatch.rm_so)
358210389Sgabor				break; 	/* No matches */
359210389Sgabor		}
360210389Sgabor	} else
361210389Sgabor		c = !vflag;
362210389Sgabor
363210389Sgabor	if (c && binbehave == BINFILE_BIN && nottext)
364210389Sgabor		return (c); /* Binary file */
365210389Sgabor
366210389Sgabor	/* Dealing with the context */
367210479Sgabor	if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) {
368210389Sgabor		if (c) {
369210389Sgabor			if (!first && !prev && !tail && Aflag)
370210389Sgabor				printf("--\n");
371210389Sgabor			tail = Aflag;
372210389Sgabor			if (Bflag > 0) {
373210389Sgabor				if (!first && !prev)
374210389Sgabor					printf("--\n");
375210389Sgabor				printqueue();
376210389Sgabor			}
377210389Sgabor			linesqueued = 0;
378210389Sgabor			printline(l, ':', matches, m);
379210389Sgabor		} else {
380210389Sgabor			printline(l, '-', matches, m);
381210389Sgabor			tail--;
382210389Sgabor		}
383210389Sgabor	}
384210389Sgabor
385210389Sgabor	if (c) {
386210389Sgabor		prev = true;
387210389Sgabor		first = false;
388210389Sgabor	} else
389210389Sgabor		prev = false;
390210389Sgabor
391210389Sgabor	return (c);
392210389Sgabor}
393210389Sgabor
394210389Sgabor/*
395210389Sgabor * Safe malloc() for internal use.
396210389Sgabor */
397210389Sgaborvoid *
398210389Sgaborgrep_malloc(size_t size)
399210389Sgabor{
400210389Sgabor	void *ptr;
401210389Sgabor
402210389Sgabor	if ((ptr = malloc(size)) == NULL)
403210389Sgabor		err(2, "malloc");
404210389Sgabor	return (ptr);
405210389Sgabor}
406210389Sgabor
407210389Sgabor/*
408210389Sgabor * Safe calloc() for internal use.
409210389Sgabor */
410210389Sgaborvoid *
411210389Sgaborgrep_calloc(size_t nmemb, size_t size)
412210389Sgabor{
413210389Sgabor	void *ptr;
414210389Sgabor
415210389Sgabor	if ((ptr = calloc(nmemb, size)) == NULL)
416210389Sgabor		err(2, "calloc");
417210389Sgabor	return (ptr);
418210389Sgabor}
419210389Sgabor
420210389Sgabor/*
421210389Sgabor * Safe realloc() for internal use.
422210389Sgabor */
423210389Sgaborvoid *
424210389Sgaborgrep_realloc(void *ptr, size_t size)
425210389Sgabor{
426210389Sgabor
427210389Sgabor	if ((ptr = realloc(ptr, size)) == NULL)
428210389Sgabor		err(2, "realloc");
429210389Sgabor	return (ptr);
430210389Sgabor}
431210389Sgabor
432210389Sgabor/*
433210578Sgabor * Safe strdup() for internal use.
434210578Sgabor */
435210578Sgaborchar *
436210578Sgaborgrep_strdup(const char *str)
437210578Sgabor{
438210578Sgabor	char *ret;
439210578Sgabor
440210578Sgabor	if ((ret = strdup(str)) == NULL)
441210578Sgabor		err(2, "strdup");
442210578Sgabor	return (ret);
443210578Sgabor}
444210578Sgabor
445210578Sgabor/*
446210389Sgabor * Prints a matching line according to the command line options.
447210389Sgabor */
448210389Sgaborvoid
449210389Sgaborprintline(struct str *line, int sep, regmatch_t *matches, int m)
450210389Sgabor{
451210389Sgabor	size_t a = 0;
452210389Sgabor	int i, n = 0;
453210389Sgabor
454210389Sgabor	if (!hflag) {
455210389Sgabor		if (nullflag == 0)
456210389Sgabor			fputs(line->file, stdout);
457210389Sgabor		else {
458210389Sgabor			printf("%s", line->file);
459210389Sgabor			putchar(0);
460210389Sgabor		}
461210389Sgabor		++n;
462210389Sgabor	}
463210389Sgabor	if (nflag) {
464210389Sgabor		if (n > 0)
465210389Sgabor			putchar(sep);
466210389Sgabor		printf("%d", line->line_no);
467210389Sgabor		++n;
468210389Sgabor	}
469210389Sgabor	if (bflag) {
470210389Sgabor		if (n > 0)
471210389Sgabor			putchar(sep);
472210389Sgabor		printf("%lld", (long long)line->off);
473210389Sgabor		++n;
474210389Sgabor	}
475210389Sgabor	if (n)
476210389Sgabor		putchar(sep);
477210389Sgabor	/* --color and -o */
478210389Sgabor	if ((oflag || color) && m > 0) {
479210389Sgabor		for (i = 0; i < m; i++) {
480210389Sgabor			if (!oflag)
481210389Sgabor				fwrite(line->dat + a, matches[i].rm_so - a, 1,
482210389Sgabor				    stdout);
483210389Sgabor			if (color)
484210389Sgabor				fprintf(stdout, "\33[%sm\33[K", color);
485210389Sgabor
486210389Sgabor				fwrite(line->dat + matches[i].rm_so,
487210389Sgabor				    matches[i].rm_eo - matches[i].rm_so, 1,
488210389Sgabor				    stdout);
489210389Sgabor			if (color)
490210389Sgabor				fprintf(stdout, "\33[m\33[K");
491210389Sgabor			a = matches[i].rm_eo;
492210389Sgabor			if (oflag)
493210389Sgabor				putchar('\n');
494210389Sgabor		}
495210389Sgabor		if (!oflag) {
496210389Sgabor			if (line->len - a > 0)
497210389Sgabor				fwrite(line->dat + a, line->len - a, 1, stdout);
498210389Sgabor			putchar('\n');
499210389Sgabor		}
500210389Sgabor	} else {
501210389Sgabor		fwrite(line->dat, line->len, 1, stdout);
502210389Sgabor		putchar('\n');
503210389Sgabor	}
504210389Sgabor}
505