filter.c revision 1.2
1/* $OpenBSD: filter.c,v 1.2 2015/11/19 22:16:43 tedu Exp $ */
2
3/* filter - postprocessing of flex output through filters */
4
5/*  This file is part of flex. */
6
7/*  Redistribution and use in source and binary forms, with or without */
8/*  modification, are permitted provided that the following conditions */
9/*  are met: */
10
11/*  1. Redistributions of source code must retain the above copyright */
12/*     notice, this list of conditions and the following disclaimer. */
13/*  2. Redistributions in binary form must reproduce the above copyright */
14/*     notice, this list of conditions and the following disclaimer in the */
15/*     documentation and/or other materials provided with the distribution. */
16
17/*  Neither the name of the University nor the names of its contributors */
18/*  may be used to endorse or promote products derived from this software */
19/*  without specific prior written permission. */
20
21/*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
22/*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
23/*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
24/*  PURPOSE. */
25
26#include "flexdef.h"
27static const char * check_4_gnu_m4 =
28    "m4_dnl ifdef(`__gnu__', ,"
29    "`errprint(Flex requires GNU M4. Set the PATH or set the M4 environment variable to its path name.)"
30    " m4exit(2)')\n";
31
32
33/** global chain. */
34struct filter *output_chain = NULL;
35
36/* Allocate and initialize an external filter.
37 * @param chain the current chain or NULL for new chain
38 * @param cmd the command to execute.
39 * @param ... a NULL terminated list of (const char*) arguments to command,
40 *            not including argv[0].
41 * @return newest filter in chain
42 */
43struct filter *filter_create_ext (struct filter *chain, const char *cmd,
44				  ...)
45{
46	struct filter *f;
47	int     max_args;
48	const char *s;
49	va_list ap;
50
51	/* allocate and initialize new filter */
52	f = (struct filter *) flex_alloc (sizeof (struct filter));
53	if (!f)
54		flexerror (_("flex_alloc failed (f) in filter_create_ext"));
55	memset (f, 0, sizeof (*f));
56	f->filter_func = NULL;
57	f->extra = NULL;
58	f->next = NULL;
59	f->argc = 0;
60
61	if (chain != NULL) {
62		/* append f to end of chain */
63		while (chain->next)
64			chain = chain->next;
65		chain->next = f;
66	}
67
68
69	/* allocate argv, and populate it with the argument list. */
70	max_args = 8;
71	f->argv =
72		(const char **) flex_alloc (sizeof (char *) *
73					    (max_args + 1));
74	if (!f->argv)
75		flexerror (_("flex_alloc failed (f->argv) in filter_create_ext"));
76	f->argv[f->argc++] = cmd;
77
78	va_start (ap, cmd);
79	while ((s = va_arg (ap, const char *)) != NULL) {
80		if (f->argc >= max_args) {
81			max_args += 8;
82			f->argv =
83				(const char **) flex_realloc (f->argv,
84							      sizeof (char
85								      *) *
86							      (max_args +
87							       1));
88		}
89		f->argv[f->argc++] = s;
90	}
91	f->argv[f->argc] = NULL;
92
93	va_end (ap);
94	return f;
95}
96
97/* Allocate and initialize an internal filter.
98 * @param chain the current chain or NULL for new chain
99 * @param filter_func The function that will perform the filtering.
100 *        filter_func should return 0 if successful, and -1
101 *        if an error occurs -- or it can simply exit().
102 * @param extra optional user-defined data to pass to the filter.
103 * @return newest filter in chain
104 */
105struct filter *filter_create_int (struct filter *chain,
106				  int (*filter_func) (struct filter *),
107				  void *extra)
108{
109	struct filter *f;
110
111	/* allocate and initialize new filter */
112	f = (struct filter *) flex_alloc (sizeof (struct filter));
113	if (!f)
114		flexerror (_("flex_alloc failed in filter_create_int"));
115	memset (f, 0, sizeof (*f));
116	f->next = NULL;
117	f->argc = 0;
118	f->argv = NULL;
119
120	f->filter_func = filter_func;
121	f->extra = extra;
122
123	if (chain != NULL) {
124		/* append f to end of chain */
125		while (chain->next)
126			chain = chain->next;
127		chain->next = f;
128	}
129
130	return f;
131}
132
133/** Fork and exec entire filter chain.
134 *  @param chain The head of the chain.
135 *  @return true on success.
136 */
137bool filter_apply_chain (struct filter * chain)
138{
139	int     pid, pipes[2];
140	int     r;
141	const int readsz = 512;
142	char   *buf;
143
144
145	/* Tricky recursion, since we want to begin the chain
146	 * at the END. Why? Because we need all the forked processes
147	 * to be children of the main flex process.
148	 */
149	if (chain)
150		filter_apply_chain (chain->next);
151	else
152		return true;
153
154	/* Now we are the right-most unprocessed link in the chain.
155	 */
156
157	fflush (stdout);
158	fflush (stderr);
159
160
161	if (pipe (pipes) == -1)
162		flexerror (_("pipe failed"));
163
164	if ((pid = fork ()) == -1)
165		flexerror (_("fork failed"));
166
167	if (pid == 0) {
168		/* child */
169
170        /* We need stdin (the FILE* stdin) to connect to this new pipe.
171         * There is no portable way to set stdin to a new file descriptor,
172         * as stdin is not an lvalue on some systems (BSD).
173         * So we dup the new pipe onto the stdin descriptor and use a no-op fseek
174         * to sync the stream. This is a Hail Mary situation. It seems to work.
175         */
176		close (pipes[1]);
177clearerr(stdin);
178		if (dup2 (pipes[0], fileno (stdin)) == -1)
179			flexfatal (_("dup2(pipes[0],0)"));
180		close (pipes[0]);
181        fseek (stdin, 0, SEEK_CUR);
182
183		/* run as a filter, either internally or by exec */
184		if (chain->filter_func) {
185			int     r;
186
187			if ((r = chain->filter_func (chain)) == -1)
188				flexfatal (_("filter_func failed"));
189			exit (0);
190		}
191		else {
192			execvp (chain->argv[0],
193				(char **const) (chain->argv));
194            lerrsf_fatal ( _("exec of %s failed"),
195                    chain->argv[0]);
196		}
197
198		exit (1);
199	}
200
201	/* Parent */
202	close (pipes[0]);
203	if (dup2 (pipes[1], fileno (stdout)) == -1)
204		flexfatal (_("dup2(pipes[1],1)"));
205	close (pipes[1]);
206    fseek (stdout, 0, SEEK_CUR);
207
208	return true;
209}
210
211/** Truncate the chain to max_len number of filters.
212 * @param chain the current chain.
213 * @param max_len the maximum length of the chain.
214 * @return the resulting length of the chain.
215 */
216int filter_truncate (struct filter *chain, int max_len)
217{
218	int     len = 1;
219
220	if (!chain)
221		return 0;
222
223	while (chain->next && len < max_len) {
224		chain = chain->next;
225		++len;
226	}
227
228	chain->next = NULL;
229	return len;
230}
231
232/** Splits the chain in order to write to a header file.
233 *  Similar in spirit to the 'tee' program.
234 *  The header file name is in extra.
235 *  @return 0 (zero) on success, and -1 on failure.
236 */
237int filter_tee_header (struct filter *chain)
238{
239	/* This function reads from stdin and writes to both the C file and the
240	 * header file at the same time.
241	 */
242
243	const int readsz = 512;
244	char   *buf;
245	int     to_cfd = -1;
246	FILE   *to_c = NULL, *to_h = NULL;
247	bool    write_header;
248
249	write_header = (chain->extra != NULL);
250
251	/* Store a copy of the stdout pipe, which is already piped to C file
252	 * through the running chain. Then create a new pipe to the H file as
253	 * stdout, and fork the rest of the chain again.
254	 */
255
256	if ((to_cfd = dup (1)) == -1)
257		flexfatal (_("dup(1) failed"));
258	to_c = fdopen (to_cfd, "w");
259
260	if (write_header) {
261		if (freopen ((char *) chain->extra, "w", stdout) == NULL)
262			flexfatal (_("freopen(headerfilename) failed"));
263
264		filter_apply_chain (chain->next);
265		to_h = stdout;
266	}
267
268	/* Now to_c is a pipe to the C branch, and to_h is a pipe to the H branch.
269	 */
270
271	if (write_header) {
272        fputs (check_4_gnu_m4, to_h);
273		fputs ("m4_changecom`'m4_dnl\n", to_h);
274		fputs ("m4_changequote`'m4_dnl\n", to_h);
275		fputs ("m4_changequote([[,]])[[]]m4_dnl\n", to_h);
276	    fputs ("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_h);
277		fputs ("m4_define( [[M4_YY_IN_HEADER]],[[]])m4_dnl\n",
278		       to_h);
279		fprintf (to_h, "#ifndef %sHEADER_H\n", prefix);
280		fprintf (to_h, "#define %sHEADER_H 1\n", prefix);
281		fprintf (to_h, "#define %sIN_HEADER 1\n\n", prefix);
282		fprintf (to_h,
283			 "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n",
284			 headerfilename ? headerfilename : "<stdout>");
285
286	}
287
288    fputs (check_4_gnu_m4, to_c);
289	fputs ("m4_changecom`'m4_dnl\n", to_c);
290	fputs ("m4_changequote`'m4_dnl\n", to_c);
291	fputs ("m4_changequote([[,]])[[]]m4_dnl\n", to_c);
292	fputs ("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_c);
293	fprintf (to_c, "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n",
294		 outfilename ? outfilename : "<stdout>");
295
296	buf = (char *) flex_alloc (readsz);
297	if (!buf)
298		flexerror (_("flex_alloc failed in filter_tee_header"));
299	while (fgets (buf, readsz, stdin)) {
300		fputs (buf, to_c);
301		if (write_header)
302			fputs (buf, to_h);
303	}
304
305	if (write_header) {
306		fprintf (to_h, "\n");
307
308		/* write a fake line number. It will get fixed by the linedir filter. */
309		fprintf (to_h, "#line 4000 \"M4_YY_OUTFILE_NAME\"\n");
310
311		fprintf (to_h, "#undef %sIN_HEADER\n", prefix);
312		fprintf (to_h, "#endif /* %sHEADER_H */\n", prefix);
313		fputs ("m4_undefine( [[M4_YY_IN_HEADER]])m4_dnl\n", to_h);
314
315		fflush (to_h);
316		if (ferror (to_h))
317			lerrsf (_("error writing output file %s"),
318				(char *) chain->extra);
319
320		else if (fclose (to_h))
321			lerrsf (_("error closing output file %s"),
322				(char *) chain->extra);
323	}
324
325	fflush (to_c);
326	if (ferror (to_c))
327		lerrsf (_("error writing output file %s"),
328			outfilename ? outfilename : "<stdout>");
329
330	else if (fclose (to_c))
331		lerrsf (_("error closing output file %s"),
332			outfilename ? outfilename : "<stdout>");
333
334	while (wait (0) > 0) ;
335
336	exit (0);
337	return 0;
338}
339
340/** Adjust the line numbers in the #line directives of the generated scanner.
341 * After the m4 expansion, the line numbers are incorrect since the m4 macros
342 * can add or remove lines.  This only adjusts line numbers for generated code,
343 * not user code. This also happens to be a good place to squeeze multiple
344 * blank lines into a single blank line.
345 */
346int filter_fix_linedirs (struct filter *chain)
347{
348	char   *buf;
349	const int readsz = 512;
350	int     lineno = 1;
351	bool    in_gen = true;	/* in generated code */
352	bool    last_was_blank = false;
353
354	if (!chain)
355		return 0;
356
357	buf = (char *) flex_alloc (readsz);
358	if (!buf)
359		flexerror (_("flex_alloc failed in filter_fix_linedirs"));
360
361	while (fgets (buf, readsz, stdin)) {
362
363		regmatch_t m[10];
364
365		/* Check for #line directive. */
366		if (buf[0] == '#'
367			&& regexec (&regex_linedir, buf, 3, m, 0) == 0) {
368
369			int     num;
370			char   *fname;
371
372			/* extract the line number and filename */
373			num = regmatch_strtol (&m[1], buf, NULL, 0);
374			fname = regmatch_dup (&m[2], buf);
375
376			if (strcmp (fname,
377				outfilename ? outfilename : "<stdout>")
378					== 0
379			 || strcmp (fname,
380			 	headerfilename ? headerfilename : "<stdout>")
381					== 0) {
382
383				char    *s1, *s2;
384				char	filename[MAXLINE];
385
386				s1 = fname;
387				s2 = filename;
388
389				while ((s2 - filename) < (MAXLINE - 1) && *s1) {
390					/* Escape the backslash */
391					if (*s1 == '\\')
392						*s2++ = '\\';
393					/* Escape the double quote */
394					if (*s1 == '\"')
395						*s2++ = '\\';
396					/* Copy the character as usual */
397					*s2++ = *s1++;
398				}
399
400				*s2 = '\0';
401
402				/* Adjust the line directives. */
403				in_gen = true;
404				snprintf (buf, readsz, "#line %d \"%s\"\n",
405					  lineno + 1, filename);
406			}
407			else {
408				/* it's a #line directive for code we didn't write */
409				in_gen = false;
410			}
411
412			free (fname);
413			last_was_blank = false;
414		}
415
416		/* squeeze blank lines from generated code */
417		else if (in_gen
418			 && regexec (&regex_blank_line, buf, 0, NULL,
419				     0) == 0) {
420			if (last_was_blank)
421				continue;
422			else
423				last_was_blank = true;
424		}
425
426		else {
427			/* it's a line of normal, non-empty code. */
428			last_was_blank = false;
429		}
430
431		fputs (buf, stdout);
432		lineno++;
433	}
434	fflush (stdout);
435	if (ferror (stdout))
436		lerrsf (_("error writing output file %s"),
437			outfilename ? outfilename : "<stdout>");
438
439	else if (fclose (stdout))
440		lerrsf (_("error closing output file %s"),
441			outfilename ? outfilename : "<stdout>");
442
443	return 0;
444}
445