util.c revision 210622
1210389Sgabor/*	$OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $	*/
2210389Sgabor
3210389Sgabor/*-
4210389Sgabor * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav
5210389Sgabor * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
6210389Sgabor * All rights reserved.
7210389Sgabor *
8210389Sgabor * Redistribution and use in source and binary forms, with or without
9210389Sgabor * modification, are permitted provided that the following conditions
10210389Sgabor * are met:
11210389Sgabor * 1. Redistributions of source code must retain the above copyright
12210389Sgabor *    notice, this list of conditions and the following disclaimer.
13210389Sgabor * 2. Redistributions in binary form must reproduce the above copyright
14210389Sgabor *    notice, this list of conditions and the following disclaimer in the
15210389Sgabor *    documentation and/or other materials provided with the distribution.
16210389Sgabor *
17210389Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18210389Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19210389Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20210389Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21210389Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22210389Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23210389Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24210389Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25210389Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26210389Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27210389Sgabor * SUCH DAMAGE.
28210389Sgabor */
29210389Sgabor
30210389Sgabor#include <sys/cdefs.h>
31210389Sgabor__FBSDID("$FreeBSD: head/usr.bin/grep/util.c 210622 2010-07-29 18:02:57Z gabor $");
32210389Sgabor
33210389Sgabor#include <sys/stat.h>
34210389Sgabor#include <sys/types.h>
35210389Sgabor
36210389Sgabor#include <ctype.h>
37210389Sgabor#include <err.h>
38210389Sgabor#include <errno.h>
39210389Sgabor#include <fnmatch.h>
40210389Sgabor#include <fts.h>
41210389Sgabor#include <libgen.h>
42210578Sgabor#include <stdbool.h>
43210389Sgabor#include <stdio.h>
44210389Sgabor#include <stdlib.h>
45210389Sgabor#include <string.h>
46210389Sgabor#include <unistd.h>
47210389Sgabor#include <wchar.h>
48210389Sgabor#include <wctype.h>
49210389Sgabor
50210389Sgabor#include "grep.h"
51210389Sgabor
52210389Sgaborstatic int	 linesqueued;
53210389Sgaborstatic int	 procline(struct str *l, int);
54210389Sgabor
55210578Sgaborbool
56210578Sgaborfile_matching(const char *fname)
57210578Sgabor{
58210578Sgabor	bool ret;
59210578Sgabor
60210578Sgabor	ret = finclude ? false : true;
61210578Sgabor
62210578Sgabor	for (unsigned int i = 0; i < fpatterns; ++i) {
63210578Sgabor		if (fnmatch(fpattern[i].pat,
64210578Sgabor		    fname, 0) == 0 || fnmatch(fpattern[i].pat,
65210578Sgabor		    basename(fname), 0) == 0) {
66210578Sgabor			if (fpattern[i].mode == EXCL_PAT)
67210578Sgabor				return (false);
68210578Sgabor			else
69210578Sgabor				ret = true;
70210578Sgabor		}
71210578Sgabor	}
72210578Sgabor	return (ret);
73210578Sgabor}
74210578Sgabor
75210578Sgaborbool
76210578Sgabordir_matching(const char *dname)
77210578Sgabor{
78210578Sgabor	bool ret;
79210578Sgabor
80210578Sgabor	ret = dinclude ? false : true;
81210578Sgabor
82210578Sgabor	for (unsigned int i = 0; i < dpatterns; ++i) {
83210578Sgabor		if (dname != NULL &&
84210578Sgabor		    fnmatch(dname, dpattern[i].pat, 0) == 0) {
85210578Sgabor			if (dpattern[i].mode == EXCL_PAT)
86210578Sgabor				return (false);
87210578Sgabor			else
88210578Sgabor				ret = true;
89210578Sgabor		}
90210578Sgabor	}
91210578Sgabor	return (ret);
92210578Sgabor}
93210578Sgabor
94210389Sgabor/*
95210389Sgabor * Processes a directory when a recursive search is performed with
96210389Sgabor * the -R option.  Each appropriate file is passed to procfile().
97210389Sgabor */
98210389Sgaborint
99210389Sgaborgrep_tree(char **argv)
100210389Sgabor{
101210389Sgabor	FTS *fts;
102210389Sgabor	FTSENT *p;
103210430Sdelphij	char *d, *dir = NULL;
104210389Sgabor	int c, fts_flags;
105210389Sgabor	bool ok;
106210389Sgabor
107210389Sgabor	c = fts_flags = 0;
108210389Sgabor
109210389Sgabor	switch(linkbehave) {
110210389Sgabor	case LINK_EXPLICIT:
111210389Sgabor		fts_flags = FTS_COMFOLLOW;
112210389Sgabor		break;
113210389Sgabor	case LINK_SKIP:
114210389Sgabor		fts_flags = FTS_PHYSICAL;
115210389Sgabor		break;
116210389Sgabor	default:
117210389Sgabor		fts_flags = FTS_LOGICAL;
118210389Sgabor
119210389Sgabor	}
120210389Sgabor
121210389Sgabor	fts_flags |= FTS_NOSTAT | FTS_NOCHDIR;
122210389Sgabor
123210389Sgabor	if (!(fts = fts_open(argv, fts_flags, NULL)))
124210430Sdelphij		err(2, "fts_open");
125210389Sgabor	while ((p = fts_read(fts)) != NULL) {
126210389Sgabor		switch (p->fts_info) {
127210389Sgabor		case FTS_DNR:
128210389Sgabor			/* FALLTHROUGH */
129210389Sgabor		case FTS_ERR:
130210389Sgabor			errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno));
131210389Sgabor			break;
132210389Sgabor		case FTS_D:
133210389Sgabor			/* FALLTHROUGH */
134210389Sgabor		case FTS_DP:
135210389Sgabor			break;
136210389Sgabor		case FTS_DC:
137210389Sgabor			/* Print a warning for recursive directory loop */
138210389Sgabor			warnx("warning: %s: recursive directory loop",
139210389Sgabor				p->fts_path);
140210389Sgabor			break;
141210389Sgabor		default:
142210389Sgabor			/* Check for file exclusion/inclusion */
143210389Sgabor			ok = true;
144210578Sgabor			if (dexclude || dinclude) {
145210430Sdelphij				if ((d = strrchr(p->fts_path, '/')) != NULL) {
146210430Sdelphij					dir = grep_malloc(sizeof(char) *
147210430Sdelphij					    (d - p->fts_path + 2));
148210430Sdelphij					strlcpy(dir, p->fts_path,
149210430Sdelphij					    (d - p->fts_path + 1));
150210430Sdelphij				}
151210578Sgabor				ok = dir_matching(dir);
152210430Sdelphij				free(dir);
153210430Sdelphij				dir = NULL;
154210389Sgabor			}
155210578Sgabor			if (fexclude || finclude)
156210578Sgabor				ok &= file_matching(p->fts_path);
157210389Sgabor
158210389Sgabor			if (ok)
159210389Sgabor				c += procfile(p->fts_path);
160210389Sgabor			break;
161210389Sgabor		}
162210389Sgabor	}
163210389Sgabor
164210430Sdelphij	fts_close(fts);
165210389Sgabor	return (c);
166210389Sgabor}
167210389Sgabor
168210389Sgabor/*
169210389Sgabor * Opens a file and processes it.  Each file is processed line-by-line
170210389Sgabor * passing the lines to procline().
171210389Sgabor */
172210389Sgaborint
173210389Sgaborprocfile(const char *fn)
174210389Sgabor{
175210389Sgabor	struct file *f;
176210389Sgabor	struct stat sb;
177210389Sgabor	struct str ln;
178210389Sgabor	mode_t s;
179210389Sgabor	int c, t;
180210389Sgabor
181210389Sgabor	if (mflag && (mcount <= 0))
182210389Sgabor		return (0);
183210389Sgabor
184210389Sgabor	if (strcmp(fn, "-") == 0) {
185210389Sgabor		fn = label != NULL ? label : getstr(1);
186210389Sgabor		f = grep_stdin_open();
187210389Sgabor	} else {
188210389Sgabor		if (!stat(fn, &sb)) {
189210389Sgabor			/* Check if we need to process the file */
190210389Sgabor			s = sb.st_mode & S_IFMT;
191210389Sgabor			if (s == S_IFDIR && dirbehave == DIR_SKIP)
192210389Sgabor				return (0);
193210389Sgabor			if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK
194210389Sgabor				|| s == S_IFSOCK) && devbehave == DEV_SKIP)
195210389Sgabor					return (0);
196210389Sgabor		}
197210389Sgabor		f = grep_open(fn);
198210389Sgabor	}
199210389Sgabor	if (f == NULL) {
200210389Sgabor		if (!sflag)
201210389Sgabor			warn("%s", fn);
202210389Sgabor		if (errno == ENOENT)
203210389Sgabor			notfound = true;
204210389Sgabor		return (0);
205210389Sgabor	}
206210389Sgabor
207210389Sgabor	ln.file = grep_malloc(strlen(fn) + 1);
208210389Sgabor	strcpy(ln.file, fn);
209210389Sgabor	ln.line_no = 0;
210210389Sgabor	ln.len = 0;
211210389Sgabor	linesqueued = 0;
212210389Sgabor	tail = 0;
213210389Sgabor	ln.off = -1;
214210389Sgabor
215210389Sgabor	for (c = 0;  c == 0 || !(lflag || qflag); ) {
216210389Sgabor		ln.off += ln.len + 1;
217210389Sgabor		if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL) {
218210389Sgabor			if (ln.line_no == 0 && matchall)
219210389Sgabor				exit(0);
220210389Sgabor			else
221210389Sgabor				break;
222210389Sgabor		}
223210389Sgabor		if (ln.len > 0 && ln.dat[ln.len - 1] == '\n')
224210389Sgabor			--ln.len;
225210389Sgabor		ln.line_no++;
226210389Sgabor
227210389Sgabor		/* Return if we need to skip a binary file */
228210389Sgabor		if (f->binary && binbehave == BINFILE_SKIP) {
229210389Sgabor			grep_close(f);
230210430Sdelphij			free(ln.file);
231210389Sgabor			free(f);
232210389Sgabor			return (0);
233210389Sgabor		}
234210389Sgabor		/* Process the file line-by-line */
235210389Sgabor		if ((t = procline(&ln, f->binary)) == 0 && Bflag > 0) {
236210389Sgabor			enqueue(&ln);
237210389Sgabor			linesqueued++;
238210389Sgabor		}
239210389Sgabor		c += t;
240210389Sgabor
241210389Sgabor		/* Count the matches if we have a match limit */
242210389Sgabor		if (mflag) {
243210389Sgabor			mcount -= t;
244210389Sgabor			if (mcount <= 0)
245210389Sgabor				break;
246210389Sgabor		}
247210389Sgabor	}
248210389Sgabor	if (Bflag > 0)
249210389Sgabor		clearqueue();
250210389Sgabor	grep_close(f);
251210389Sgabor
252210389Sgabor	if (cflag) {
253210389Sgabor		if (!hflag)
254210389Sgabor			printf("%s:", ln.file);
255210389Sgabor		printf("%u\n", c);
256210389Sgabor	}
257210461Sgabor	if (lflag && !qflag && c != 0)
258210389Sgabor		printf("%s\n", fn);
259210461Sgabor	if (Lflag && !qflag && c == 0)
260210389Sgabor		printf("%s\n", fn);
261210389Sgabor	if (c && !cflag && !lflag && !Lflag &&
262210389Sgabor	    binbehave == BINFILE_BIN && f->binary && !qflag)
263210622Sgabor		printf(getstr(8), fn);
264210389Sgabor
265210430Sdelphij	free(ln.file);
266210389Sgabor	free(f);
267210389Sgabor	return (c);
268210389Sgabor}
269210389Sgabor
270210389Sgabor#define iswword(x)	(iswalnum((x)) || (x) == L'_')
271210389Sgabor
272210389Sgabor/*
273210389Sgabor * Processes a line comparing it with the specified patterns.  Each pattern
274210389Sgabor * is looped to be compared along with the full string, saving each and every
275210389Sgabor * match, which is necessary to colorize the output and to count the
276210389Sgabor * matches.  The matching lines are passed to printline() to display the
277210389Sgabor * appropriate output.
278210389Sgabor */
279210389Sgaborstatic int
280210389Sgaborprocline(struct str *l, int nottext)
281210389Sgabor{
282210389Sgabor	regmatch_t matches[MAX_LINE_MATCHES];
283210389Sgabor	regmatch_t pmatch;
284210389Sgabor	size_t st = 0;
285210389Sgabor	unsigned int i;
286210389Sgabor	int c = 0, m = 0, r = 0;
287210389Sgabor
288210389Sgabor	if (!matchall) {
289210389Sgabor		/* Loop to process the whole line */
290210389Sgabor		while (st <= l->len) {
291210389Sgabor			pmatch.rm_so = st;
292210389Sgabor			pmatch.rm_eo = l->len;
293210389Sgabor
294210389Sgabor			/* Loop to compare with all the patterns */
295210389Sgabor			for (i = 0; i < patterns; i++) {
296210389Sgabor/*
297210389Sgabor * XXX: grep_search() is a workaround for speed up and should be
298210389Sgabor * removed in the future.  See fastgrep.c.
299210389Sgabor */
300210389Sgabor				if (fg_pattern[i].pattern) {
301210389Sgabor					r = grep_search(&fg_pattern[i],
302210389Sgabor					    (unsigned char *)l->dat,
303210389Sgabor					    l->len, &pmatch);
304210389Sgabor					r = (r == 0) ? 0 : REG_NOMATCH;
305210389Sgabor					st = pmatch.rm_eo;
306210389Sgabor				} else {
307210389Sgabor					r = regexec(&r_pattern[i], l->dat, 1,
308210389Sgabor					    &pmatch, eflags);
309210389Sgabor					r = (r == 0) ? 0 : REG_NOMATCH;
310210389Sgabor					st = pmatch.rm_eo;
311210389Sgabor				}
312210389Sgabor				if (r == REG_NOMATCH)
313210389Sgabor					continue;
314210389Sgabor				/* Check for full match */
315210389Sgabor				if (r == 0 && xflag)
316210389Sgabor					if (pmatch.rm_so != 0 ||
317210389Sgabor					    (size_t)pmatch.rm_eo != l->len)
318210389Sgabor						r = REG_NOMATCH;
319210389Sgabor				/* Check for whole word match */
320210389Sgabor				if (r == 0 && wflag && pmatch.rm_so != 0 &&
321210389Sgabor				    (size_t)pmatch.rm_eo != l->len) {
322210389Sgabor					wchar_t *wbegin;
323210389Sgabor					wint_t wend;
324210389Sgabor					size_t size;
325210389Sgabor
326210389Sgabor					size = mbstowcs(NULL, l->dat,
327210389Sgabor					    pmatch.rm_so);
328210389Sgabor
329210389Sgabor					if (size == ((size_t) - 1))
330210389Sgabor						r = REG_NOMATCH;
331210389Sgabor					else {
332210389Sgabor						wbegin = grep_malloc(size);
333210389Sgabor						if (mbstowcs(wbegin, l->dat,
334210389Sgabor						    pmatch.rm_so) == ((size_t) - 1))
335210389Sgabor							r = REG_NOMATCH;
336210389Sgabor						else if (sscanf(&l->dat[pmatch.rm_eo],
337210389Sgabor						    "%lc", &wend) != 1)
338210389Sgabor							r = REG_NOMATCH;
339210389Sgabor						else if (iswword(wbegin[wcslen(wbegin)]) ||
340210389Sgabor						    iswword(wend))
341210389Sgabor							r = REG_NOMATCH;
342210389Sgabor						free(wbegin);
343210389Sgabor					}
344210389Sgabor				}
345210389Sgabor				if (r == 0) {
346210389Sgabor					if (m == 0)
347210389Sgabor						c++;
348210389Sgabor					if (m < MAX_LINE_MATCHES)
349210389Sgabor						matches[m++] = pmatch;
350210389Sgabor					/* matches - skip further patterns */
351210461Sgabor					if ((color != NULL && !oflag) || qflag || lflag)
352210461Sgabor						break;
353210389Sgabor				}
354210389Sgabor			}
355210389Sgabor
356210389Sgabor			if (vflag) {
357210389Sgabor				c = !c;
358210389Sgabor				break;
359210389Sgabor			}
360210389Sgabor			/* One pass if we are not recording matches */
361210461Sgabor			if ((color != NULL && !oflag) || qflag || lflag)
362210389Sgabor				break;
363210389Sgabor
364210389Sgabor			if (st == (size_t)pmatch.rm_so)
365210389Sgabor				break; 	/* No matches */
366210389Sgabor		}
367210389Sgabor	} else
368210389Sgabor		c = !vflag;
369210389Sgabor
370210389Sgabor	if (c && binbehave == BINFILE_BIN && nottext)
371210389Sgabor		return (c); /* Binary file */
372210389Sgabor
373210389Sgabor	/* Dealing with the context */
374210479Sgabor	if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) {
375210389Sgabor		if (c) {
376210389Sgabor			if (!first && !prev && !tail && Aflag)
377210389Sgabor				printf("--\n");
378210389Sgabor			tail = Aflag;
379210389Sgabor			if (Bflag > 0) {
380210389Sgabor				if (!first && !prev)
381210389Sgabor					printf("--\n");
382210389Sgabor				printqueue();
383210389Sgabor			}
384210389Sgabor			linesqueued = 0;
385210389Sgabor			printline(l, ':', matches, m);
386210389Sgabor		} else {
387210389Sgabor			printline(l, '-', matches, m);
388210389Sgabor			tail--;
389210389Sgabor		}
390210389Sgabor	}
391210389Sgabor
392210389Sgabor	if (c) {
393210389Sgabor		prev = true;
394210389Sgabor		first = false;
395210389Sgabor	} else
396210389Sgabor		prev = false;
397210389Sgabor
398210389Sgabor	return (c);
399210389Sgabor}
400210389Sgabor
401210389Sgabor/*
402210389Sgabor * Safe malloc() for internal use.
403210389Sgabor */
404210389Sgaborvoid *
405210389Sgaborgrep_malloc(size_t size)
406210389Sgabor{
407210389Sgabor	void *ptr;
408210389Sgabor
409210389Sgabor	if ((ptr = malloc(size)) == NULL)
410210389Sgabor		err(2, "malloc");
411210389Sgabor	return (ptr);
412210389Sgabor}
413210389Sgabor
414210389Sgabor/*
415210389Sgabor * Safe calloc() for internal use.
416210389Sgabor */
417210389Sgaborvoid *
418210389Sgaborgrep_calloc(size_t nmemb, size_t size)
419210389Sgabor{
420210389Sgabor	void *ptr;
421210389Sgabor
422210389Sgabor	if ((ptr = calloc(nmemb, size)) == NULL)
423210389Sgabor		err(2, "calloc");
424210389Sgabor	return (ptr);
425210389Sgabor}
426210389Sgabor
427210389Sgabor/*
428210389Sgabor * Safe realloc() for internal use.
429210389Sgabor */
430210389Sgaborvoid *
431210389Sgaborgrep_realloc(void *ptr, size_t size)
432210389Sgabor{
433210389Sgabor
434210389Sgabor	if ((ptr = realloc(ptr, size)) == NULL)
435210389Sgabor		err(2, "realloc");
436210389Sgabor	return (ptr);
437210389Sgabor}
438210389Sgabor
439210389Sgabor/*
440210578Sgabor * Safe strdup() for internal use.
441210578Sgabor */
442210578Sgaborchar *
443210578Sgaborgrep_strdup(const char *str)
444210578Sgabor{
445210578Sgabor	char *ret;
446210578Sgabor
447210578Sgabor	if ((ret = strdup(str)) == NULL)
448210578Sgabor		err(2, "strdup");
449210578Sgabor	return (ret);
450210578Sgabor}
451210578Sgabor
452210578Sgabor/*
453210389Sgabor * Prints a matching line according to the command line options.
454210389Sgabor */
455210389Sgaborvoid
456210389Sgaborprintline(struct str *line, int sep, regmatch_t *matches, int m)
457210389Sgabor{
458210389Sgabor	size_t a = 0;
459210389Sgabor	int i, n = 0;
460210389Sgabor
461210389Sgabor	if (!hflag) {
462210389Sgabor		if (nullflag == 0)
463210389Sgabor			fputs(line->file, stdout);
464210389Sgabor		else {
465210389Sgabor			printf("%s", line->file);
466210389Sgabor			putchar(0);
467210389Sgabor		}
468210389Sgabor		++n;
469210389Sgabor	}
470210389Sgabor	if (nflag) {
471210389Sgabor		if (n > 0)
472210389Sgabor			putchar(sep);
473210389Sgabor		printf("%d", line->line_no);
474210389Sgabor		++n;
475210389Sgabor	}
476210389Sgabor	if (bflag) {
477210389Sgabor		if (n > 0)
478210389Sgabor			putchar(sep);
479210389Sgabor		printf("%lld", (long long)line->off);
480210389Sgabor		++n;
481210389Sgabor	}
482210389Sgabor	if (n)
483210389Sgabor		putchar(sep);
484210389Sgabor	/* --color and -o */
485210389Sgabor	if ((oflag || color) && m > 0) {
486210389Sgabor		for (i = 0; i < m; i++) {
487210389Sgabor			if (!oflag)
488210389Sgabor				fwrite(line->dat + a, matches[i].rm_so - a, 1,
489210389Sgabor				    stdout);
490210389Sgabor			if (color)
491210389Sgabor				fprintf(stdout, "\33[%sm\33[K", color);
492210389Sgabor
493210389Sgabor				fwrite(line->dat + matches[i].rm_so,
494210389Sgabor				    matches[i].rm_eo - matches[i].rm_so, 1,
495210389Sgabor				    stdout);
496210389Sgabor			if (color)
497210389Sgabor				fprintf(stdout, "\33[m\33[K");
498210389Sgabor			a = matches[i].rm_eo;
499210389Sgabor			if (oflag)
500210389Sgabor				putchar('\n');
501210389Sgabor		}
502210389Sgabor		if (!oflag) {
503210389Sgabor			if (line->len - a > 0)
504210389Sgabor				fwrite(line->dat + a, line->len - a, 1, stdout);
505210389Sgabor			putchar('\n');
506210389Sgabor		}
507210389Sgabor	} else {
508210389Sgabor		fwrite(line->dat, line->len, 1, stdout);
509210389Sgabor		putchar('\n');
510210389Sgabor	}
511210389Sgabor}
512