filter.c revision 228072
1/* filter - postprocessing of flex output through filters */
2
3/*  This file is part of flex. */
4
5/*  Redistribution and use in source and binary forms, with or without */
6/*  modification, are permitted provided that the following conditions */
7/*  are met: */
8
9/*  1. Redistributions of source code must retain the above copyright */
10/*     notice, this list of conditions and the following disclaimer. */
11/*  2. Redistributions in binary form must reproduce the above copyright */
12/*     notice, this list of conditions and the following disclaimer in the */
13/*     documentation and/or other materials provided with the distribution. */
14
15/*  Neither the name of the University nor the names of its contributors */
16/*  may be used to endorse or promote products derived from this software */
17/*  without specific prior written permission. */
18
19/*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
20/*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
21/*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
22/*  PURPOSE. */
23
24#include "flexdef.h"
25static const char * check_4_gnu_m4 =
26    "m4_dnl ifdef(`__gnu__', ,"
27    "`errprint(Flex requires GNU M4. Set the PATH or set the M4 environment variable to its path name.)"
28    " m4exit(2)')\n";
29
30
31/** global chain. */
32struct filter *output_chain = NULL;
33
34/* Allocate and initialize an external filter.
35 * @param chain the current chain or NULL for new chain
36 * @param cmd the command to execute.
37 * @param ... a NULL terminated list of (const char*) arguments to command,
38 *            not including argv[0].
39 * @return newest filter in chain
40 */
41struct filter *filter_create_ext (struct filter *chain, const char *cmd,
42				  ...)
43{
44	struct filter *f;
45	int     max_args;
46	const char *s;
47	va_list ap;
48
49	/* allocate and initialize new filter */
50	f = (struct filter *) flex_alloc (sizeof (struct filter));
51	memset (f, 0, sizeof (*f));
52	f->filter_func = NULL;
53	f->extra = NULL;
54	f->next = NULL;
55	f->argc = 0;
56
57	if (chain != NULL) {
58		/* append f to end of chain */
59		while (chain->next)
60			chain = chain->next;
61		chain->next = f;
62	}
63
64
65	/* allocate argv, and populate it with the argument list. */
66	max_args = 8;
67	f->argv =
68		(const char **) flex_alloc (sizeof (char *) *
69					    (max_args + 1));
70	f->argv[f->argc++] = cmd;
71
72	va_start (ap, cmd);
73	while ((s = va_arg (ap, const char *)) != NULL) {
74		if (f->argc >= max_args) {
75			max_args += 8;
76			f->argv =
77				(const char **) flex_realloc (f->argv,
78							      sizeof (char
79								      *) *
80							      (max_args +
81							       1));
82		}
83		f->argv[f->argc++] = s;
84	}
85	f->argv[f->argc] = NULL;
86
87	va_end (ap);
88	return f;
89}
90
91/* Allocate and initialize an internal filter.
92 * @param chain the current chain or NULL for new chain
93 * @param filter_func The function that will perform the filtering.
94 *        filter_func should return 0 if successful, and -1
95 *        if an error occurs -- or it can simply exit().
96 * @param extra optional user-defined data to pass to the filter.
97 * @return newest filter in chain
98 */
99struct filter *filter_create_int (struct filter *chain,
100				  int (*filter_func) (struct filter *),
101				  void *extra)
102{
103	struct filter *f;
104
105	/* allocate and initialize new filter */
106	f = (struct filter *) flex_alloc (sizeof (struct filter));
107	memset (f, 0, sizeof (*f));
108	f->next = NULL;
109	f->argc = 0;
110	f->argv = NULL;
111
112	f->filter_func = filter_func;
113	f->extra = extra;
114
115	if (chain != NULL) {
116		/* append f to end of chain */
117		while (chain->next)
118			chain = chain->next;
119		chain->next = f;
120	}
121
122	return f;
123}
124
125/** Fork and exec entire filter chain.
126 *  @param chain The head of the chain.
127 *  @return true on success.
128 */
129bool filter_apply_chain (struct filter * chain)
130{
131	int     pid, pipes[2];
132
133	/* Tricky recursion, since we want to begin the chain
134	 * at the END. Why? Because we need all the forked processes
135	 * to be children of the main flex process.
136	 */
137	if (chain)
138		filter_apply_chain (chain->next);
139	else
140		return true;
141
142	/* Now we are the right-most unprocessed link in the chain.
143	 */
144
145	fflush (stdout);
146	fflush (stderr);
147
148	if (pipe (pipes) == -1)
149		flexerror (_("pipe failed"));
150
151	if ((pid = fork ()) == -1)
152		flexerror (_("fork failed"));
153
154	if (pid == 0) {
155		/* child */
156
157        /* We need stdin (the FILE* stdin) to connect to this new pipe.
158         * There is no portable way to set stdin to a new file descriptor,
159         * as stdin is not an lvalue on some systems (BSD).
160         * So we dup the new pipe onto the stdin descriptor and use a no-op fseek
161         * to sync the stream. This is a Hail Mary situation. It seems to work.
162         */
163		close (pipes[1]);
164		if (dup2 (pipes[0], fileno (stdin)) == -1)
165			flexfatal (_("dup2(pipes[0],0)"));
166		close (pipes[0]);
167        fseek (stdin, 0, SEEK_CUR);
168
169		/* run as a filter, either internally or by exec */
170		if (chain->filter_func) {
171			int     r;
172
173			if ((r = chain->filter_func (chain)) == -1)
174				flexfatal (_("filter_func failed"));
175			exit (0);
176		}
177		else {
178			execvp (chain->argv[0],
179				(char **const) (chain->argv));
180			flexfatal (_("exec failed"));
181		}
182
183		exit (1);
184	}
185
186	/* Parent */
187	close (pipes[0]);
188	if (dup2 (pipes[1], fileno (stdout)) == -1)
189		flexfatal (_("dup2(pipes[1],1)"));
190	close (pipes[1]);
191    fseek (stdout, 0, SEEK_CUR);
192
193	return true;
194}
195
196/** Truncate the chain to max_len number of filters.
197 * @param chain the current chain.
198 * @param max_len the maximum length of the chain.
199 * @return the resulting length of the chain.
200 */
201int filter_truncate (struct filter *chain, int max_len)
202{
203	int     len = 1;
204
205	if (!chain)
206		return 0;
207
208	while (chain->next && len < max_len) {
209		chain = chain->next;
210		++len;
211	}
212
213	chain->next = NULL;
214	return len;
215}
216
217/** Splits the chain in order to write to a header file.
218 *  Similar in spirit to the 'tee' program.
219 *  The header file name is in extra.
220 *  @return 0 (zero) on success, and -1 on failure.
221 */
222int filter_tee_header (struct filter *chain)
223{
224	/* This function reads from stdin and writes to both the C file and the
225	 * header file at the same time.
226	 */
227
228	const int readsz = 512;
229	char   *buf;
230	int     to_cfd = -1;
231	FILE   *to_c = NULL, *to_h = NULL;
232	bool    write_header;
233
234	write_header = (chain->extra != NULL);
235
236	/* Store a copy of the stdout pipe, which is already piped to C file
237	 * through the running chain. Then create a new pipe to the H file as
238	 * stdout, and fork the rest of the chain again.
239	 */
240
241	if ((to_cfd = dup (1)) == -1)
242		flexfatal (_("dup(1) failed"));
243	to_c = fdopen (to_cfd, "w");
244
245	if (write_header) {
246		if (freopen ((char *) chain->extra, "w", stdout) == NULL)
247			flexfatal (_("freopen(headerfilename) failed"));
248
249		filter_apply_chain (chain->next);
250		to_h = stdout;
251	}
252
253	/* Now to_c is a pipe to the C branch, and to_h is a pipe to the H branch.
254	 */
255
256	if (write_header) {
257        fputs (check_4_gnu_m4, to_h);
258		fputs ("m4_changecom`'m4_dnl\n", to_h);
259		fputs ("m4_changequote`'m4_dnl\n", to_h);
260		fputs ("m4_changequote([[,]])[[]]m4_dnl\n", to_h);
261	    fputs ("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_h);
262		fputs ("m4_define( [[M4_YY_IN_HEADER]],[[]])m4_dnl\n",
263		       to_h);
264		fprintf (to_h, "#ifndef %sHEADER_H\n", prefix);
265		fprintf (to_h, "#define %sHEADER_H 1\n", prefix);
266		fprintf (to_h, "#define %sIN_HEADER 1\n\n", prefix);
267		fprintf (to_h,
268			 "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n",
269			 headerfilename ? headerfilename : "<stdout>");
270
271	}
272
273    fputs (check_4_gnu_m4, to_c);
274	fputs ("m4_changecom`'m4_dnl\n", to_c);
275	fputs ("m4_changequote`'m4_dnl\n", to_c);
276	fputs ("m4_changequote([[,]])[[]]m4_dnl\n", to_c);
277	fputs ("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_c);
278	fprintf (to_c, "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n",
279		 outfilename ? outfilename : "<stdout>");
280
281	buf = (char *) flex_alloc (readsz);
282	while (fgets (buf, readsz, stdin)) {
283		fputs (buf, to_c);
284		if (write_header)
285			fputs (buf, to_h);
286	}
287
288	if (write_header) {
289		fprintf (to_h, "\n");
290
291		/* write a fake line number. It will get fixed by the linedir filter. */
292		fprintf (to_h, "#line 4000 \"M4_YY_OUTFILE_NAME\"\n");
293
294		fprintf (to_h, "#undef %sIN_HEADER\n", prefix);
295		fprintf (to_h, "#endif /* %sHEADER_H */\n", prefix);
296		fputs ("m4_undefine( [[M4_YY_IN_HEADER]])m4_dnl\n", to_h);
297
298		fflush (to_h);
299	    if (ferror (to_h))
300		    lerrsf (_("error writing output file %s"),
301                (char *) chain->extra);
302
303    	else if (fclose (to_h))
304	    	lerrsf (_("error closing output file %s"),
305                (char *) chain->extra);
306	}
307
308	fflush (to_c);
309	if (ferror (to_c))
310		lerrsf (_("error writing output file %s"),
311			outfilename ? outfilename : "<stdout>");
312
313	else if (fclose (to_c))
314		lerrsf (_("error closing output file %s"),
315			outfilename ? outfilename : "<stdout>");
316
317	while (wait (0) > 0) ;
318
319	exit (0);
320	return 0;
321}
322
323/** Adjust the line numbers in the #line directives of the generated scanner.
324 * After the m4 expansion, the line numbers are incorrect since the m4 macros
325 * can add or remove lines.  This only adjusts line numbers for generated code,
326 * not user code. This also happens to be a good place to squeeze multiple
327 * blank lines into a single blank line.
328 */
329int filter_fix_linedirs (struct filter *chain)
330{
331	char   *buf;
332	const int readsz = 512;
333	int     lineno = 1;
334	bool    in_gen = true;	/* in generated code */
335	bool    last_was_blank = false;
336
337	if (!chain)
338		return 0;
339
340	buf = (char *) flex_alloc (readsz);
341
342	while (fgets (buf, readsz, stdin)) {
343
344		regmatch_t m[10];
345
346		/* Check for #line directive. */
347		if (buf[0] == '#'
348		    && regexec (&regex_linedir, buf, 3, m, 0) == 0) {
349
350			int     num;
351			char   *fname;
352
353			/* extract the line number and filename */
354			num = regmatch_strtol (&m[1], buf, NULL, 0);
355			fname = regmatch_dup (&m[2], buf);
356
357			if (strcmp (fname,
358				outfilename ? outfilename : "<stdout>")
359					== 0
360			 || strcmp (fname,
361			 	headerfilename ? headerfilename : "<stdout>")
362					== 0) {
363
364				char    *s1, *s2;
365				char	filename[MAXLINE];
366
367				s1 = fname;
368				s2 = filename;
369
370				while ((s2 - filename) < (MAXLINE - 1) && *s1) {
371					/* Escape the backslash */
372					if (*s1 == '\\')
373						*s2++ = '\\';
374					/* Escape the double quote */
375					if (*s1 == '\"')
376						*s2++ = '\\';
377					/* Copy the character as usual */
378					*s2++ = *s1++;
379				}
380
381				*s2 = '\0';
382
383				/* Adjust the line directives. */
384				in_gen = true;
385				snprintf (buf, readsz, "#line %d \"%s\"\n",
386					  lineno + 1, filename);
387			}
388			else {
389				/* it's a #line directive for code we didn't write */
390				in_gen = false;
391			}
392
393			free (fname);
394			last_was_blank = false;
395		}
396
397		/* squeeze blank lines from generated code */
398		else if (in_gen
399			 && regexec (&regex_blank_line, buf, 0, NULL,
400				     0) == 0) {
401			if (last_was_blank)
402				continue;
403			else
404				last_was_blank = true;
405		}
406
407		else {
408			/* it's a line of normal, non-empty code. */
409			last_was_blank = false;
410		}
411
412		fputs (buf, stdout);
413		lineno++;
414	}
415	fflush (stdout);
416	if (ferror (stdout))
417		lerrsf (_("error writing output file %s"),
418			outfilename ? outfilename : "<stdout>");
419
420	else if (fclose (stdout))
421		lerrsf (_("error closing output file %s"),
422			outfilename ? outfilename : "<stdout>");
423
424	return 0;
425}
426
427/* vim:set expandtab cindent tabstop=4 softtabstop=4 shiftwidth=4 textwidth=0: */
428