filter.c revision 250125
1228072Sbapt/* filter - postprocessing of flex output through filters */
2228072Sbapt
3228072Sbapt/*  This file is part of flex. */
4228072Sbapt
5228072Sbapt/*  Redistribution and use in source and binary forms, with or without */
6228072Sbapt/*  modification, are permitted provided that the following conditions */
7228072Sbapt/*  are met: */
8228072Sbapt
9228072Sbapt/*  1. Redistributions of source code must retain the above copyright */
10228072Sbapt/*     notice, this list of conditions and the following disclaimer. */
11228072Sbapt/*  2. Redistributions in binary form must reproduce the above copyright */
12228072Sbapt/*     notice, this list of conditions and the following disclaimer in the */
13228072Sbapt/*     documentation and/or other materials provided with the distribution. */
14228072Sbapt
15228072Sbapt/*  Neither the name of the University nor the names of its contributors */
16228072Sbapt/*  may be used to endorse or promote products derived from this software */
17228072Sbapt/*  without specific prior written permission. */
18228072Sbapt
19228072Sbapt/*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
20228072Sbapt/*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
21228072Sbapt/*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
22228072Sbapt/*  PURPOSE. */
23228072Sbapt
24228072Sbapt#include "flexdef.h"
25228072Sbaptstatic const char * check_4_gnu_m4 =
26228072Sbapt    "m4_dnl ifdef(`__gnu__', ,"
27228072Sbapt    "`errprint(Flex requires GNU M4. Set the PATH or set the M4 environment variable to its path name.)"
28228072Sbapt    " m4exit(2)')\n";
29228072Sbapt
30228072Sbapt
31228072Sbapt/** global chain. */
32228072Sbaptstruct filter *output_chain = NULL;
33228072Sbapt
34228072Sbapt/* Allocate and initialize an external filter.
35228072Sbapt * @param chain the current chain or NULL for new chain
36228072Sbapt * @param cmd the command to execute.
37228072Sbapt * @param ... a NULL terminated list of (const char*) arguments to command,
38228072Sbapt *            not including argv[0].
39228072Sbapt * @return newest filter in chain
40228072Sbapt */
41228072Sbaptstruct filter *filter_create_ext (struct filter *chain, const char *cmd,
42228072Sbapt				  ...)
43228072Sbapt{
44228072Sbapt	struct filter *f;
45228072Sbapt	int     max_args;
46228072Sbapt	const char *s;
47228072Sbapt	va_list ap;
48228072Sbapt
49228072Sbapt	/* allocate and initialize new filter */
50228072Sbapt	f = (struct filter *) flex_alloc (sizeof (struct filter));
51250125Sjkim	if (!f)
52250125Sjkim		flexerror (_("flex_alloc failed (f) in filter_create_ext"));
53228072Sbapt	memset (f, 0, sizeof (*f));
54228072Sbapt	f->filter_func = NULL;
55228072Sbapt	f->extra = NULL;
56228072Sbapt	f->next = NULL;
57228072Sbapt	f->argc = 0;
58228072Sbapt
59228072Sbapt	if (chain != NULL) {
60228072Sbapt		/* append f to end of chain */
61228072Sbapt		while (chain->next)
62228072Sbapt			chain = chain->next;
63228072Sbapt		chain->next = f;
64228072Sbapt	}
65228072Sbapt
66228072Sbapt
67228072Sbapt	/* allocate argv, and populate it with the argument list. */
68228072Sbapt	max_args = 8;
69228072Sbapt	f->argv =
70228072Sbapt		(const char **) flex_alloc (sizeof (char *) *
71228072Sbapt					    (max_args + 1));
72250125Sjkim	if (!f->argv)
73250125Sjkim		flexerror (_("flex_alloc failed (f->argv) in filter_create_ext"));
74228072Sbapt	f->argv[f->argc++] = cmd;
75228072Sbapt
76228072Sbapt	va_start (ap, cmd);
77228072Sbapt	while ((s = va_arg (ap, const char *)) != NULL) {
78228072Sbapt		if (f->argc >= max_args) {
79228072Sbapt			max_args += 8;
80228072Sbapt			f->argv =
81228072Sbapt				(const char **) flex_realloc (f->argv,
82228072Sbapt							      sizeof (char
83228072Sbapt								      *) *
84228072Sbapt							      (max_args +
85228072Sbapt							       1));
86228072Sbapt		}
87228072Sbapt		f->argv[f->argc++] = s;
88228072Sbapt	}
89228072Sbapt	f->argv[f->argc] = NULL;
90228072Sbapt
91228072Sbapt	va_end (ap);
92228072Sbapt	return f;
93228072Sbapt}
94228072Sbapt
95228072Sbapt/* Allocate and initialize an internal filter.
96228072Sbapt * @param chain the current chain or NULL for new chain
97228072Sbapt * @param filter_func The function that will perform the filtering.
98228072Sbapt *        filter_func should return 0 if successful, and -1
99228072Sbapt *        if an error occurs -- or it can simply exit().
100228072Sbapt * @param extra optional user-defined data to pass to the filter.
101228072Sbapt * @return newest filter in chain
102228072Sbapt */
103228072Sbaptstruct filter *filter_create_int (struct filter *chain,
104228072Sbapt				  int (*filter_func) (struct filter *),
105228072Sbapt				  void *extra)
106228072Sbapt{
107228072Sbapt	struct filter *f;
108228072Sbapt
109228072Sbapt	/* allocate and initialize new filter */
110228072Sbapt	f = (struct filter *) flex_alloc (sizeof (struct filter));
111250125Sjkim	if (!f)
112250125Sjkim		flexerror (_("flex_alloc failed in filter_create_int"));
113228072Sbapt	memset (f, 0, sizeof (*f));
114228072Sbapt	f->next = NULL;
115228072Sbapt	f->argc = 0;
116228072Sbapt	f->argv = NULL;
117228072Sbapt
118228072Sbapt	f->filter_func = filter_func;
119228072Sbapt	f->extra = extra;
120228072Sbapt
121228072Sbapt	if (chain != NULL) {
122228072Sbapt		/* append f to end of chain */
123228072Sbapt		while (chain->next)
124228072Sbapt			chain = chain->next;
125228072Sbapt		chain->next = f;
126228072Sbapt	}
127228072Sbapt
128228072Sbapt	return f;
129228072Sbapt}
130228072Sbapt
131228072Sbapt/** Fork and exec entire filter chain.
132228072Sbapt *  @param chain The head of the chain.
133228072Sbapt *  @return true on success.
134228072Sbapt */
135228072Sbaptbool filter_apply_chain (struct filter * chain)
136228072Sbapt{
137228072Sbapt	int     pid, pipes[2];
138250125Sjkim	int     r;
139250125Sjkim	const int readsz = 512;
140250125Sjkim	char   *buf;
141228072Sbapt
142250125Sjkim
143228072Sbapt	/* Tricky recursion, since we want to begin the chain
144228072Sbapt	 * at the END. Why? Because we need all the forked processes
145228072Sbapt	 * to be children of the main flex process.
146228072Sbapt	 */
147228072Sbapt	if (chain)
148228072Sbapt		filter_apply_chain (chain->next);
149228072Sbapt	else
150228072Sbapt		return true;
151228072Sbapt
152228072Sbapt	/* Now we are the right-most unprocessed link in the chain.
153228072Sbapt	 */
154228072Sbapt
155228072Sbapt	fflush (stdout);
156228072Sbapt	fflush (stderr);
157228072Sbapt
158250125Sjkim
159228072Sbapt	if (pipe (pipes) == -1)
160228072Sbapt		flexerror (_("pipe failed"));
161228072Sbapt
162228072Sbapt	if ((pid = fork ()) == -1)
163228072Sbapt		flexerror (_("fork failed"));
164228072Sbapt
165228072Sbapt	if (pid == 0) {
166228072Sbapt		/* child */
167228072Sbapt
168228072Sbapt        /* We need stdin (the FILE* stdin) to connect to this new pipe.
169228072Sbapt         * There is no portable way to set stdin to a new file descriptor,
170228072Sbapt         * as stdin is not an lvalue on some systems (BSD).
171228072Sbapt         * So we dup the new pipe onto the stdin descriptor and use a no-op fseek
172228072Sbapt         * to sync the stream. This is a Hail Mary situation. It seems to work.
173228072Sbapt         */
174228072Sbapt		close (pipes[1]);
175250125Sjkimclearerr(stdin);
176228072Sbapt		if (dup2 (pipes[0], fileno (stdin)) == -1)
177228072Sbapt			flexfatal (_("dup2(pipes[0],0)"));
178228072Sbapt		close (pipes[0]);
179228072Sbapt        fseek (stdin, 0, SEEK_CUR);
180228072Sbapt
181228072Sbapt		/* run as a filter, either internally or by exec */
182228072Sbapt		if (chain->filter_func) {
183228072Sbapt			int     r;
184228072Sbapt
185228072Sbapt			if ((r = chain->filter_func (chain)) == -1)
186228072Sbapt				flexfatal (_("filter_func failed"));
187228072Sbapt			exit (0);
188228072Sbapt		}
189228072Sbapt		else {
190228072Sbapt			execvp (chain->argv[0],
191228072Sbapt				(char **const) (chain->argv));
192250125Sjkim            lerrsf_fatal ( _("exec of %s failed"),
193250125Sjkim                    chain->argv[0]);
194228072Sbapt		}
195228072Sbapt
196228072Sbapt		exit (1);
197228072Sbapt	}
198228072Sbapt
199228072Sbapt	/* Parent */
200228072Sbapt	close (pipes[0]);
201228072Sbapt	if (dup2 (pipes[1], fileno (stdout)) == -1)
202228072Sbapt		flexfatal (_("dup2(pipes[1],1)"));
203228072Sbapt	close (pipes[1]);
204228072Sbapt    fseek (stdout, 0, SEEK_CUR);
205228072Sbapt
206228072Sbapt	return true;
207228072Sbapt}
208228072Sbapt
209228072Sbapt/** Truncate the chain to max_len number of filters.
210228072Sbapt * @param chain the current chain.
211228072Sbapt * @param max_len the maximum length of the chain.
212228072Sbapt * @return the resulting length of the chain.
213228072Sbapt */
214228072Sbaptint filter_truncate (struct filter *chain, int max_len)
215228072Sbapt{
216228072Sbapt	int     len = 1;
217228072Sbapt
218228072Sbapt	if (!chain)
219228072Sbapt		return 0;
220228072Sbapt
221228072Sbapt	while (chain->next && len < max_len) {
222228072Sbapt		chain = chain->next;
223228072Sbapt		++len;
224228072Sbapt	}
225228072Sbapt
226228072Sbapt	chain->next = NULL;
227228072Sbapt	return len;
228228072Sbapt}
229228072Sbapt
230228072Sbapt/** Splits the chain in order to write to a header file.
231228072Sbapt *  Similar in spirit to the 'tee' program.
232228072Sbapt *  The header file name is in extra.
233228072Sbapt *  @return 0 (zero) on success, and -1 on failure.
234228072Sbapt */
235228072Sbaptint filter_tee_header (struct filter *chain)
236228072Sbapt{
237228072Sbapt	/* This function reads from stdin and writes to both the C file and the
238228072Sbapt	 * header file at the same time.
239228072Sbapt	 */
240228072Sbapt
241228072Sbapt	const int readsz = 512;
242228072Sbapt	char   *buf;
243228072Sbapt	int     to_cfd = -1;
244228072Sbapt	FILE   *to_c = NULL, *to_h = NULL;
245228072Sbapt	bool    write_header;
246228072Sbapt
247228072Sbapt	write_header = (chain->extra != NULL);
248228072Sbapt
249228072Sbapt	/* Store a copy of the stdout pipe, which is already piped to C file
250228072Sbapt	 * through the running chain. Then create a new pipe to the H file as
251228072Sbapt	 * stdout, and fork the rest of the chain again.
252228072Sbapt	 */
253228072Sbapt
254228072Sbapt	if ((to_cfd = dup (1)) == -1)
255228072Sbapt		flexfatal (_("dup(1) failed"));
256228072Sbapt	to_c = fdopen (to_cfd, "w");
257228072Sbapt
258228072Sbapt	if (write_header) {
259228072Sbapt		if (freopen ((char *) chain->extra, "w", stdout) == NULL)
260228072Sbapt			flexfatal (_("freopen(headerfilename) failed"));
261228072Sbapt
262228072Sbapt		filter_apply_chain (chain->next);
263228072Sbapt		to_h = stdout;
264228072Sbapt	}
265228072Sbapt
266228072Sbapt	/* Now to_c is a pipe to the C branch, and to_h is a pipe to the H branch.
267228072Sbapt	 */
268228072Sbapt
269228072Sbapt	if (write_header) {
270228072Sbapt        fputs (check_4_gnu_m4, to_h);
271228072Sbapt		fputs ("m4_changecom`'m4_dnl\n", to_h);
272228072Sbapt		fputs ("m4_changequote`'m4_dnl\n", to_h);
273228072Sbapt		fputs ("m4_changequote([[,]])[[]]m4_dnl\n", to_h);
274228072Sbapt	    fputs ("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_h);
275228072Sbapt		fputs ("m4_define( [[M4_YY_IN_HEADER]],[[]])m4_dnl\n",
276228072Sbapt		       to_h);
277228072Sbapt		fprintf (to_h, "#ifndef %sHEADER_H\n", prefix);
278228072Sbapt		fprintf (to_h, "#define %sHEADER_H 1\n", prefix);
279228072Sbapt		fprintf (to_h, "#define %sIN_HEADER 1\n\n", prefix);
280228072Sbapt		fprintf (to_h,
281228072Sbapt			 "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n",
282228072Sbapt			 headerfilename ? headerfilename : "<stdout>");
283228072Sbapt
284228072Sbapt	}
285228072Sbapt
286228072Sbapt    fputs (check_4_gnu_m4, to_c);
287228072Sbapt	fputs ("m4_changecom`'m4_dnl\n", to_c);
288228072Sbapt	fputs ("m4_changequote`'m4_dnl\n", to_c);
289228072Sbapt	fputs ("m4_changequote([[,]])[[]]m4_dnl\n", to_c);
290228072Sbapt	fputs ("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_c);
291228072Sbapt	fprintf (to_c, "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n",
292228072Sbapt		 outfilename ? outfilename : "<stdout>");
293228072Sbapt
294228072Sbapt	buf = (char *) flex_alloc (readsz);
295250125Sjkim	if (!buf)
296250125Sjkim		flexerror (_("flex_alloc failed in filter_tee_header"));
297228072Sbapt	while (fgets (buf, readsz, stdin)) {
298228072Sbapt		fputs (buf, to_c);
299228072Sbapt		if (write_header)
300228072Sbapt			fputs (buf, to_h);
301228072Sbapt	}
302228072Sbapt
303228072Sbapt	if (write_header) {
304228072Sbapt		fprintf (to_h, "\n");
305228072Sbapt
306228072Sbapt		/* write a fake line number. It will get fixed by the linedir filter. */
307228072Sbapt		fprintf (to_h, "#line 4000 \"M4_YY_OUTFILE_NAME\"\n");
308228072Sbapt
309228072Sbapt		fprintf (to_h, "#undef %sIN_HEADER\n", prefix);
310228072Sbapt		fprintf (to_h, "#endif /* %sHEADER_H */\n", prefix);
311228072Sbapt		fputs ("m4_undefine( [[M4_YY_IN_HEADER]])m4_dnl\n", to_h);
312228072Sbapt
313228072Sbapt		fflush (to_h);
314250125Sjkim		if (ferror (to_h))
315250125Sjkim			lerrsf (_("error writing output file %s"),
316250125Sjkim				(char *) chain->extra);
317228072Sbapt
318250125Sjkim		else if (fclose (to_h))
319250125Sjkim			lerrsf (_("error closing output file %s"),
320250125Sjkim				(char *) chain->extra);
321228072Sbapt	}
322228072Sbapt
323228072Sbapt	fflush (to_c);
324228072Sbapt	if (ferror (to_c))
325228072Sbapt		lerrsf (_("error writing output file %s"),
326228072Sbapt			outfilename ? outfilename : "<stdout>");
327228072Sbapt
328228072Sbapt	else if (fclose (to_c))
329228072Sbapt		lerrsf (_("error closing output file %s"),
330228072Sbapt			outfilename ? outfilename : "<stdout>");
331228072Sbapt
332228072Sbapt	while (wait (0) > 0) ;
333228072Sbapt
334228072Sbapt	exit (0);
335228072Sbapt	return 0;
336228072Sbapt}
337228072Sbapt
338228072Sbapt/** Adjust the line numbers in the #line directives of the generated scanner.
339228072Sbapt * After the m4 expansion, the line numbers are incorrect since the m4 macros
340228072Sbapt * can add or remove lines.  This only adjusts line numbers for generated code,
341228072Sbapt * not user code. This also happens to be a good place to squeeze multiple
342228072Sbapt * blank lines into a single blank line.
343228072Sbapt */
344228072Sbaptint filter_fix_linedirs (struct filter *chain)
345228072Sbapt{
346228072Sbapt	char   *buf;
347228072Sbapt	const int readsz = 512;
348228072Sbapt	int     lineno = 1;
349228072Sbapt	bool    in_gen = true;	/* in generated code */
350228072Sbapt	bool    last_was_blank = false;
351228072Sbapt
352228072Sbapt	if (!chain)
353228072Sbapt		return 0;
354228072Sbapt
355228072Sbapt	buf = (char *) flex_alloc (readsz);
356250125Sjkim	if (!buf)
357250125Sjkim		flexerror (_("flex_alloc failed in filter_fix_linedirs"));
358228072Sbapt
359228072Sbapt	while (fgets (buf, readsz, stdin)) {
360228072Sbapt
361228072Sbapt		regmatch_t m[10];
362228072Sbapt
363228072Sbapt		/* Check for #line directive. */
364228072Sbapt		if (buf[0] == '#'
365250125Sjkim			&& regexec (&regex_linedir, buf, 3, m, 0) == 0) {
366228072Sbapt
367228072Sbapt			int     num;
368228072Sbapt			char   *fname;
369228072Sbapt
370228072Sbapt			/* extract the line number and filename */
371228072Sbapt			num = regmatch_strtol (&m[1], buf, NULL, 0);
372228072Sbapt			fname = regmatch_dup (&m[2], buf);
373228072Sbapt
374228072Sbapt			if (strcmp (fname,
375228072Sbapt				outfilename ? outfilename : "<stdout>")
376228072Sbapt					== 0
377228072Sbapt			 || strcmp (fname,
378228072Sbapt			 	headerfilename ? headerfilename : "<stdout>")
379228072Sbapt					== 0) {
380228072Sbapt
381228072Sbapt				char    *s1, *s2;
382228072Sbapt				char	filename[MAXLINE];
383228072Sbapt
384228072Sbapt				s1 = fname;
385228072Sbapt				s2 = filename;
386228072Sbapt
387228072Sbapt				while ((s2 - filename) < (MAXLINE - 1) && *s1) {
388228072Sbapt					/* Escape the backslash */
389228072Sbapt					if (*s1 == '\\')
390228072Sbapt						*s2++ = '\\';
391228072Sbapt					/* Escape the double quote */
392228072Sbapt					if (*s1 == '\"')
393228072Sbapt						*s2++ = '\\';
394228072Sbapt					/* Copy the character as usual */
395228072Sbapt					*s2++ = *s1++;
396228072Sbapt				}
397228072Sbapt
398228072Sbapt				*s2 = '\0';
399228072Sbapt
400228072Sbapt				/* Adjust the line directives. */
401228072Sbapt				in_gen = true;
402228072Sbapt				snprintf (buf, readsz, "#line %d \"%s\"\n",
403228072Sbapt					  lineno + 1, filename);
404228072Sbapt			}
405228072Sbapt			else {
406228072Sbapt				/* it's a #line directive for code we didn't write */
407228072Sbapt				in_gen = false;
408228072Sbapt			}
409228072Sbapt
410228072Sbapt			free (fname);
411228072Sbapt			last_was_blank = false;
412228072Sbapt		}
413228072Sbapt
414228072Sbapt		/* squeeze blank lines from generated code */
415228072Sbapt		else if (in_gen
416228072Sbapt			 && regexec (&regex_blank_line, buf, 0, NULL,
417228072Sbapt				     0) == 0) {
418228072Sbapt			if (last_was_blank)
419228072Sbapt				continue;
420228072Sbapt			else
421228072Sbapt				last_was_blank = true;
422228072Sbapt		}
423228072Sbapt
424228072Sbapt		else {
425228072Sbapt			/* it's a line of normal, non-empty code. */
426228072Sbapt			last_was_blank = false;
427228072Sbapt		}
428228072Sbapt
429228072Sbapt		fputs (buf, stdout);
430228072Sbapt		lineno++;
431228072Sbapt	}
432228072Sbapt	fflush (stdout);
433228072Sbapt	if (ferror (stdout))
434228072Sbapt		lerrsf (_("error writing output file %s"),
435228072Sbapt			outfilename ? outfilename : "<stdout>");
436228072Sbapt
437228072Sbapt	else if (fclose (stdout))
438228072Sbapt		lerrsf (_("error closing output file %s"),
439228072Sbapt			outfilename ? outfilename : "<stdout>");
440228072Sbapt
441228072Sbapt	return 0;
442228072Sbapt}
443228072Sbapt
444228072Sbapt/* vim:set expandtab cindent tabstop=4 softtabstop=4 shiftwidth=4 textwidth=0: */
445