1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28/*	  All Rights Reserved  	*/
29
30/*	Copyright (c) 1987, 1988 Microsoft Corporation	*/
31/*	  All Rights Reserved	*/
32
33#pragma ident	"%Z%%M%	%I%	%E% SMI"
34
35/*
36 * fgrep -- print all lines containing any of a set of keywords
37 *
38 *	status returns:
39 *		0 - ok, and some matches
40 *		1 - ok, but no matches
41 *		2 - some error
42 */
43
44#include <stdio.h>
45#include <ctype.h>
46#include <sys/types.h>
47#include <stdlib.h>
48#include <string.h>
49#include <locale.h>
50#include <libintl.h>
51#include <euc.h>
52#include <sys/stat.h>
53#include <fcntl.h>
54
55#include <getwidth.h>
56
57eucwidth_t WW;
58#define	WIDTH1	WW._eucw1
59#define	WIDTH2	WW._eucw2
60#define	WIDTH3	WW._eucw3
61#define	MULTI_BYTE	WW._multibyte
62#define	GETONE(lc, p) \
63	cw = ISASCII(lc = (unsigned char)*p++) ? 1 :     \
64		(ISSET2(lc) ? WIDTH2 :                       \
65		(ISSET3(lc) ? WIDTH3 : WIDTH1));             \
66	if (--cw > --ccount) {                           \
67		cw -= ccount;                                \
68		while (ccount--)                             \
69			lc = (lc << 7) | ((*p++) & 0177);        \
70			if (p >= &buf[fw_lBufsiz + BUFSIZ]) {    \
71			if (nlp == buf) {                        \
72				/* Increase the buffer size */       \
73				fw_lBufsiz += BUFSIZ;                \
74				if ((buf = realloc(buf,              \
75					fw_lBufsiz + BUFSIZ)) == NULL) { \
76					exit(2); /* out of memory */     \
77				}                                    \
78				nlp = buf;                           \
79				p = &buf[fw_lBufsiz];                \
80			} else {                                 \
81				/* shift the buffer contents down */ \
82				(void) memmove(buf, nlp,             \
83					&buf[fw_lBufsiz + BUFSIZ] - nlp);\
84				p -= nlp - buf;                      \
85				nlp = buf;                           \
86			}                                        \
87		}                                            \
88		if (p > &buf[fw_lBufsiz]) {                  \
89			if ((ccount = fread(p, sizeof (char),    \
90			    &buf[fw_lBufsiz + BUFSIZ] - p, fptr))\
91				<= 0) break;                         \
92		} else if ((ccount = fread(p,                \
93			sizeof (char),  BUFSIZ, fptr)) <= 0)     \
94			break;                                   \
95		blkno += (long long)ccount;                  \
96	}                                                \
97	ccount -= cw;                                    \
98	while (cw--)                                     \
99		lc = (lc << 7) | ((*p++) & 0177)
100
101/*
102 * The same() macro and letter() function were inserted to allow for
103 * the -i option work for the multi-byte environment.
104 */
105wchar_t letter();
106#define	same(a, b) \
107	(a == b || iflag && (!MULTI_BYTE || ISASCII(a)) && (a ^ b) == ' ' && \
108	letter(a) == letter(b))
109
110
111#define	QSIZE 400
112struct words {
113	wchar_t inp;
114	char	out;
115	struct	words *nst;
116	struct	words *link;
117	struct	words *fail;
118} *w = NULL, *smax, *q;
119
120FILE *fptr;
121long long lnum;
122int	bflag, cflag, lflag, fflag, nflag, vflag, xflag, eflag, sflag;
123int	hflag, iflag;
124int	retcode = 0;
125int	nfile;
126long long blkno;
127int	nsucc;
128long long tln;
129FILE	*wordf;
130char	*argptr;
131off_t input_size = 0;
132
133void	execute(char *);
134void	cgotofn(void);
135void	overflo(void);
136void	cfail(void);
137
138static long fw_lBufsiz = 0;
139
140int
141main(int argc, char **argv)
142{
143	int c;
144	int errflg = 0;
145	struct stat file_stat;
146
147	(void) setlocale(LC_ALL, "");
148#if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
149#define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't */
150#endif
151	(void) textdomain(TEXT_DOMAIN);
152
153	while ((c = getopt(argc, argv, "hybcie:f:lnvxs")) != EOF)
154		switch (c) {
155
156		case 's':
157			sflag++;
158			continue;
159		case 'h':
160			hflag++;
161			continue;
162		case 'b':
163			bflag++;
164			continue;
165
166		case 'i':
167		case 'y':
168			iflag++;
169			continue;
170
171		case 'c':
172			cflag++;
173			continue;
174
175		case 'e':
176			eflag++;
177			argptr = optarg;
178			input_size = strlen(argptr);
179			continue;
180
181		case 'f':
182			fflag++;
183			wordf = fopen(optarg, "r");
184			if (wordf == NULL) {
185				(void) fprintf(stderr,
186					gettext("fgrep: can't open %s\n"),
187					optarg);
188				exit(2);
189			}
190
191			if (fstat(fileno(wordf), &file_stat) == 0) {
192			    input_size = file_stat.st_size;
193			} else {
194				(void) fprintf(stderr,
195					gettext("fgrep: can't fstat %s\n"),
196					optarg);
197				exit(2);
198			}
199
200			continue;
201
202		case 'l':
203			lflag++;
204			continue;
205
206		case 'n':
207			nflag++;
208			continue;
209
210		case 'v':
211			vflag++;
212			continue;
213
214		case 'x':
215			xflag++;
216			continue;
217
218		case '?':
219			errflg++;
220	}
221
222	argc -= optind;
223	if (errflg || ((argc <= 0) && !fflag && !eflag)) {
224		(void) printf(gettext("usage: fgrep [ -bchilnsvx ] "
225			"[ -e exp ] [ -f file ] [ strings ] [ file ] ...\n"));
226		exit(2);
227	}
228	if (!eflag && !fflag) {
229		argptr = argv[optind];
230		input_size = strlen(argptr);
231		input_size++;
232		optind++;
233		argc--;
234	}
235
236/*
237 * Normally we need one struct words for each letter in the pattern
238 * plus one terminating struct words with outp = 1, but when -x option
239 * is specified we require one more struct words for `\n` character so we
240 * calculate the input_size as below. We add extra 1 because
241 * (input_size/2) rounds off odd numbers
242 */
243
244	if (xflag) {
245		input_size = input_size + (input_size/2) + 1;
246	}
247
248	input_size++;
249
250	w = (struct words *)calloc(input_size, sizeof (struct words));
251	if (w == NULL) {
252		(void) fprintf(stderr,
253			gettext("fgrep: could not allocate "
254				"memory for wordlist\n"));
255		exit(2);
256	}
257
258	getwidth(&WW);
259	if ((WIDTH1 == 0) && (WIDTH2 == 0) &&
260		(WIDTH3 == 0)) {
261		/*
262		 * If non EUC-based locale,
263		 * assume WIDTH1 is 1.
264		 */
265		WIDTH1 = 1;
266	}
267	WIDTH2++;
268	WIDTH3++;
269
270	cgotofn();
271	cfail();
272	nfile = argc;
273	argv = &argv[optind];
274	if (argc <= 0) {
275		execute((char *)NULL);
276	} else
277		while (--argc >= 0) {
278			execute(*argv);
279			argv++;
280		}
281
282	if (w != NULL) {
283		free(w);
284	}
285
286	return (retcode != 0 ? retcode : nsucc == 0);
287}
288
289void
290execute(char *file)
291{
292	char *p;
293	struct words *c;
294	int ccount;
295	static char *buf = NULL;
296	int failed;
297	char *nlp;
298	wchar_t lc;
299	int cw;
300
301	if (buf == NULL) {
302		fw_lBufsiz = BUFSIZ;
303		if ((buf = malloc(fw_lBufsiz + BUFSIZ)) == NULL) {
304			exit(2); /* out of memory */
305		}
306	}
307
308	if (file) {
309		if ((fptr = fopen(file, "r")) == NULL) {
310			(void) fprintf(stderr,
311				gettext("fgrep: can't open %s\n"), file);
312			retcode = 2;
313			return;
314		}
315	} else {
316		file = "<stdin>";
317		fptr = stdin;
318	}
319	ccount = 0;
320	failed = 0;
321	lnum = 1;
322	tln = 0;
323	blkno = 0;
324	p = buf;
325	nlp = p;
326	c = w;
327	for (;;) {
328		if (c == 0)
329			break;
330		if (ccount <= 0) {
331			if (p >= &buf[fw_lBufsiz + BUFSIZ]) {
332				if (nlp == buf) {
333					/* increase the buffer size */
334					fw_lBufsiz += BUFSIZ;
335					if ((buf = realloc(buf,
336						fw_lBufsiz + BUFSIZ)) == NULL) {
337						exit(2); /* out of memory */
338					}
339					nlp = buf;
340					p = &buf[fw_lBufsiz];
341				} else {
342					/* shift the buffer down */
343					(void) memmove(buf, nlp,
344						&buf[fw_lBufsiz + BUFSIZ]
345						- nlp);
346					p -= nlp - buf;
347					nlp = buf;
348				}
349
350			}
351			if (p > &buf[fw_lBufsiz]) {
352				if ((ccount = fread(p, sizeof (char),
353					&buf[fw_lBufsiz + BUFSIZ] - p, fptr))
354					<= 0)
355					break;
356			} else if ((ccount = fread(p, sizeof (char),
357				BUFSIZ, fptr)) <= 0)
358				break;
359			blkno += (long long)ccount;
360		}
361		GETONE(lc, p);
362nstate:
363		if (same(c->inp, lc)) {
364			c = c->nst;
365		} else if (c->link != 0) {
366			c = c->link;
367			goto nstate;
368		} else {
369			c = c->fail;
370			failed = 1;
371			if (c == 0) {
372				c = w;
373istate:
374				if (same(c->inp, lc)) {
375					c = c->nst;
376				} else if (c->link != 0) {
377					c = c->link;
378					goto istate;
379				}
380			} else
381				goto nstate;
382		}
383
384		if (c == 0)
385			break;
386
387		if (c->out) {
388			while (lc != '\n') {
389				if (ccount <= 0) {
390if (p == &buf[fw_lBufsiz + BUFSIZ]) {
391	if (nlp == buf) {
392		/* increase buffer size */
393		fw_lBufsiz += BUFSIZ;
394		if ((buf = realloc(buf, fw_lBufsiz + BUFSIZ)) == NULL) {
395			exit(2); /* out of memory */
396		}
397		nlp = buf;
398		p = &buf[fw_lBufsiz];
399	} else {
400		/* shift buffer down */
401		(void) memmove(buf, nlp, &buf[fw_lBufsiz + BUFSIZ] - nlp);
402		p -= nlp - buf;
403		nlp = buf;
404	}
405}
406if (p > &buf[fw_lBufsiz]) {
407	if ((ccount = fread(p, sizeof (char),
408		&buf[fw_lBufsiz + BUFSIZ] - p, fptr)) <= 0) break;
409	} else if ((ccount = fread(p, sizeof (char), BUFSIZ,
410		fptr)) <= 0) break;
411		blkno += (long long)ccount;
412	}
413	GETONE(lc, p);
414}
415			if ((vflag && (failed == 0 || xflag == 0)) ||
416				(vflag == 0 && xflag && failed))
417				goto nomatch;
418succeed:
419			nsucc = 1;
420			if (cflag)
421				tln++;
422			else if (lflag && !sflag) {
423				(void) printf("%s\n", file);
424				(void) fclose(fptr);
425				return;
426			} else if (!sflag) {
427				if (nfile > 1 && !hflag)
428					(void) printf("%s:", file);
429				if (bflag)
430					(void) printf("%lld:",
431						(blkno - (long long)(ccount-1))
432						/ BUFSIZ);
433				if (nflag)
434					(void) printf("%lld:", lnum);
435				if (p <= nlp) {
436					while (nlp < &buf[fw_lBufsiz + BUFSIZ])
437						(void) putchar(*nlp++);
438					nlp = buf;
439				}
440				while (nlp < p)
441					(void) putchar(*nlp++);
442			}
443nomatch:
444			lnum++;
445			nlp = p;
446			c = w;
447			failed = 0;
448			continue;
449		}
450		if (lc == '\n')
451			if (vflag)
452				goto succeed;
453			else {
454				lnum++;
455				nlp = p;
456				c = w;
457				failed = 0;
458			}
459	}
460	(void) fclose(fptr);
461	if (cflag) {
462		if ((nfile > 1) && !hflag)
463			(void) printf("%s:", file);
464		(void) printf("%lld\n", tln);
465	}
466}
467
468
469wchar_t
470getargc(void)
471{
472	/* appends a newline to shell quoted argument list so */
473	/* the list looks like it came from an ed style file  */
474	wchar_t c;
475	int cw;
476	int b;
477	static int endflg;
478
479
480	if (wordf) {
481		if ((b = getc(wordf)) == EOF)
482			return (EOF);
483		cw = ISASCII(c = (wchar_t)b) ? 1 :
484			(ISSET2(c) ? WIDTH2 : (ISSET3(c) ? WIDTH3 : WIDTH1));
485		while (--cw) {
486			if ((b = getc(wordf)) == EOF)
487				return (EOF);
488			c = (c << 7) | (b & 0177);
489		}
490		return (iflag ? letter(c) : c);
491	}
492
493	if (endflg)
494		return (EOF);
495
496	{
497		cw = ISASCII(c = (unsigned char)*argptr++) ? 1 :
498			(ISSET2(c) ? WIDTH2 : (ISSET3(c) ? WIDTH3 : WIDTH1));
499
500		while (--cw)
501			c = (c << 7) | ((*argptr++) & 0177);
502		if (c == '\0') {
503			endflg++;
504			return ('\n');
505		}
506	}
507	return (iflag ? letter(c) : c);
508
509
510}
511
512void
513cgotofn(void)
514{
515	int c;
516	struct words *s;
517
518	s = smax = w;
519nword:
520	for (;;) {
521		c = getargc();
522		if (c == EOF)
523			return;
524		if (c == 0)
525			goto enter;
526		if (c == '\n') {
527			if (xflag) {
528				for (;;) {
529					if (s->inp == c) {
530						s = s->nst;
531						break;
532					}
533					if (s->inp == 0)
534						goto nenter;
535					if (s->link == 0) {
536						if (smax >= &w[input_size -1])
537							overflo();
538						s->link = ++smax;
539						s = smax;
540						goto nenter;
541					}
542					s = s->link;
543				}
544			}
545			s->out = 1;
546			s = w;
547		} else {
548loop:
549			if (s->inp == c) {
550				s = s->nst;
551				continue;
552			}
553			if (s->inp == 0)
554				goto enter;
555			if (s->link == 0) {
556				if (smax >= &w[input_size -1])
557					overflo();
558				s->link = ++smax;
559				s = smax;
560				goto enter;
561			}
562			s = s->link;
563			goto loop;
564		}
565	}
566
567enter:
568	do {
569		s->inp = c;
570		if (smax >= &w[input_size -1])
571			overflo();
572		s->nst = ++smax;
573		s = smax;
574	} while ((c = getargc()) != '\n' && c != EOF);
575	if (xflag) {
576nenter:
577		s->inp = '\n';
578		if (smax >= &w[input_size -1])
579			overflo();
580		s->nst = ++smax;
581	}
582	smax->out = 1;
583	s = w;
584	if (c != EOF)
585		goto nword;
586}
587
588/*
589 * This function is an unexpected condition, since input_size should have been
590 * calculated correctly before hand.
591 */
592
593void
594overflo(void)
595{
596	(void) fprintf(stderr, gettext("fgrep: wordlist too large\n"));
597	exit(2);
598}
599
600void
601cfail(void)
602{
603	int qsize = QSIZE;
604	struct words **queue = NULL;
605
606	/*
607	 * front and rear are pointers used to traverse the global words
608	 * structure "w" which contains the data of input pattern file
609	 */
610	struct words **front, **rear;
611	struct words *state;
612	unsigned long frontoffset = 0, rearoffset = 0;
613	char c;
614	struct words *s;
615	s = w;
616	if ((queue = (struct words **)calloc(qsize, sizeof (struct words *)))
617				== NULL) {
618		perror("fgrep");
619		exit(2);
620	}
621	front = rear = queue;
622init:
623	if ((s->inp) != 0) {
624		*rear++ = s->nst;
625	/*
626	 * Reallocates the queue if the number of distinct starting
627	 * character of patterns exceeds the qsize value
628	 */
629		if (rear >= &queue[qsize - 1]) {
630			frontoffset = front - queue;
631			rearoffset = rear - queue;
632			qsize += QSIZE;
633			if ((queue = (struct words **)realloc(queue,
634				qsize * sizeof (struct words *))) == NULL) {
635				perror("fgrep");
636				exit(2);
637			}
638			front = queue + frontoffset;
639			rear = queue + rearoffset;
640		}
641	}
642	if ((s = s->link) != 0) {
643		goto init;
644	}
645
646	while (rear != front) {
647		s = *front++;
648cloop:
649		if ((c = s->inp) != 0) {
650			*rear++ = (q = s->nst);
651		/*
652		 * Reallocate the queue if the rear pointer reaches the end
653		 * queue
654		 */
655			if (rear >= &queue[qsize - 1]) {
656				frontoffset = front - queue;
657				rearoffset = rear - queue;
658				qsize += QSIZE;
659				if ((queue = (struct words **)realloc(queue,
660				    qsize * sizeof (struct words *))) == NULL) {
661					perror("fgrep");
662					exit(2);
663				}
664				front = queue + frontoffset;
665				rear = queue + rearoffset;
666			}
667			state = s->fail;
668floop:
669			if (state == 0)
670				state = w;
671			if (state->inp == c) {
672qloop:
673				q->fail = state->nst;
674				if ((state->nst)->out == 1)
675					q->out = 1;
676				if ((q = q->link) != 0)
677					goto qloop;
678			} else if ((state = state->link) != 0)
679				goto floop;
680		}
681		if ((s = s->link) != 0)
682			goto cloop;
683	}
684}
685
686wchar_t
687letter(wchar_t c)
688{
689	if (c >= 'a' && c <= 'z')
690		return (c);
691	if (c >= 'A' && c <= 'Z')
692		return (c + 'a' - 'A');
693	return (c);
694}
695