filter.c revision 1.2
1/* $OpenBSD: filter.c,v 1.2 2015/11/19 22:16:43 tedu Exp $ */ 2 3/* filter - postprocessing of flex output through filters */ 4 5/* This file is part of flex. */ 6 7/* Redistribution and use in source and binary forms, with or without */ 8/* modification, are permitted provided that the following conditions */ 9/* are met: */ 10 11/* 1. Redistributions of source code must retain the above copyright */ 12/* notice, this list of conditions and the following disclaimer. */ 13/* 2. Redistributions in binary form must reproduce the above copyright */ 14/* notice, this list of conditions and the following disclaimer in the */ 15/* documentation and/or other materials provided with the distribution. */ 16 17/* Neither the name of the University nor the names of its contributors */ 18/* may be used to endorse or promote products derived from this software */ 19/* without specific prior written permission. */ 20 21/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 22/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 23/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 24/* PURPOSE. */ 25 26#include "flexdef.h" 27static const char * check_4_gnu_m4 = 28 "m4_dnl ifdef(`__gnu__', ," 29 "`errprint(Flex requires GNU M4. Set the PATH or set the M4 environment variable to its path name.)" 30 " m4exit(2)')\n"; 31 32 33/** global chain. */ 34struct filter *output_chain = NULL; 35 36/* Allocate and initialize an external filter. 37 * @param chain the current chain or NULL for new chain 38 * @param cmd the command to execute. 39 * @param ... a NULL terminated list of (const char*) arguments to command, 40 * not including argv[0]. 41 * @return newest filter in chain 42 */ 43struct filter *filter_create_ext (struct filter *chain, const char *cmd, 44 ...) 45{ 46 struct filter *f; 47 int max_args; 48 const char *s; 49 va_list ap; 50 51 /* allocate and initialize new filter */ 52 f = (struct filter *) flex_alloc (sizeof (struct filter)); 53 if (!f) 54 flexerror (_("flex_alloc failed (f) in filter_create_ext")); 55 memset (f, 0, sizeof (*f)); 56 f->filter_func = NULL; 57 f->extra = NULL; 58 f->next = NULL; 59 f->argc = 0; 60 61 if (chain != NULL) { 62 /* append f to end of chain */ 63 while (chain->next) 64 chain = chain->next; 65 chain->next = f; 66 } 67 68 69 /* allocate argv, and populate it with the argument list. */ 70 max_args = 8; 71 f->argv = 72 (const char **) flex_alloc (sizeof (char *) * 73 (max_args + 1)); 74 if (!f->argv) 75 flexerror (_("flex_alloc failed (f->argv) in filter_create_ext")); 76 f->argv[f->argc++] = cmd; 77 78 va_start (ap, cmd); 79 while ((s = va_arg (ap, const char *)) != NULL) { 80 if (f->argc >= max_args) { 81 max_args += 8; 82 f->argv = 83 (const char **) flex_realloc (f->argv, 84 sizeof (char 85 *) * 86 (max_args + 87 1)); 88 } 89 f->argv[f->argc++] = s; 90 } 91 f->argv[f->argc] = NULL; 92 93 va_end (ap); 94 return f; 95} 96 97/* Allocate and initialize an internal filter. 98 * @param chain the current chain or NULL for new chain 99 * @param filter_func The function that will perform the filtering. 100 * filter_func should return 0 if successful, and -1 101 * if an error occurs -- or it can simply exit(). 102 * @param extra optional user-defined data to pass to the filter. 103 * @return newest filter in chain 104 */ 105struct filter *filter_create_int (struct filter *chain, 106 int (*filter_func) (struct filter *), 107 void *extra) 108{ 109 struct filter *f; 110 111 /* allocate and initialize new filter */ 112 f = (struct filter *) flex_alloc (sizeof (struct filter)); 113 if (!f) 114 flexerror (_("flex_alloc failed in filter_create_int")); 115 memset (f, 0, sizeof (*f)); 116 f->next = NULL; 117 f->argc = 0; 118 f->argv = NULL; 119 120 f->filter_func = filter_func; 121 f->extra = extra; 122 123 if (chain != NULL) { 124 /* append f to end of chain */ 125 while (chain->next) 126 chain = chain->next; 127 chain->next = f; 128 } 129 130 return f; 131} 132 133/** Fork and exec entire filter chain. 134 * @param chain The head of the chain. 135 * @return true on success. 136 */ 137bool filter_apply_chain (struct filter * chain) 138{ 139 int pid, pipes[2]; 140 int r; 141 const int readsz = 512; 142 char *buf; 143 144 145 /* Tricky recursion, since we want to begin the chain 146 * at the END. Why? Because we need all the forked processes 147 * to be children of the main flex process. 148 */ 149 if (chain) 150 filter_apply_chain (chain->next); 151 else 152 return true; 153 154 /* Now we are the right-most unprocessed link in the chain. 155 */ 156 157 fflush (stdout); 158 fflush (stderr); 159 160 161 if (pipe (pipes) == -1) 162 flexerror (_("pipe failed")); 163 164 if ((pid = fork ()) == -1) 165 flexerror (_("fork failed")); 166 167 if (pid == 0) { 168 /* child */ 169 170 /* We need stdin (the FILE* stdin) to connect to this new pipe. 171 * There is no portable way to set stdin to a new file descriptor, 172 * as stdin is not an lvalue on some systems (BSD). 173 * So we dup the new pipe onto the stdin descriptor and use a no-op fseek 174 * to sync the stream. This is a Hail Mary situation. It seems to work. 175 */ 176 close (pipes[1]); 177clearerr(stdin); 178 if (dup2 (pipes[0], fileno (stdin)) == -1) 179 flexfatal (_("dup2(pipes[0],0)")); 180 close (pipes[0]); 181 fseek (stdin, 0, SEEK_CUR); 182 183 /* run as a filter, either internally or by exec */ 184 if (chain->filter_func) { 185 int r; 186 187 if ((r = chain->filter_func (chain)) == -1) 188 flexfatal (_("filter_func failed")); 189 exit (0); 190 } 191 else { 192 execvp (chain->argv[0], 193 (char **const) (chain->argv)); 194 lerrsf_fatal ( _("exec of %s failed"), 195 chain->argv[0]); 196 } 197 198 exit (1); 199 } 200 201 /* Parent */ 202 close (pipes[0]); 203 if (dup2 (pipes[1], fileno (stdout)) == -1) 204 flexfatal (_("dup2(pipes[1],1)")); 205 close (pipes[1]); 206 fseek (stdout, 0, SEEK_CUR); 207 208 return true; 209} 210 211/** Truncate the chain to max_len number of filters. 212 * @param chain the current chain. 213 * @param max_len the maximum length of the chain. 214 * @return the resulting length of the chain. 215 */ 216int filter_truncate (struct filter *chain, int max_len) 217{ 218 int len = 1; 219 220 if (!chain) 221 return 0; 222 223 while (chain->next && len < max_len) { 224 chain = chain->next; 225 ++len; 226 } 227 228 chain->next = NULL; 229 return len; 230} 231 232/** Splits the chain in order to write to a header file. 233 * Similar in spirit to the 'tee' program. 234 * The header file name is in extra. 235 * @return 0 (zero) on success, and -1 on failure. 236 */ 237int filter_tee_header (struct filter *chain) 238{ 239 /* This function reads from stdin and writes to both the C file and the 240 * header file at the same time. 241 */ 242 243 const int readsz = 512; 244 char *buf; 245 int to_cfd = -1; 246 FILE *to_c = NULL, *to_h = NULL; 247 bool write_header; 248 249 write_header = (chain->extra != NULL); 250 251 /* Store a copy of the stdout pipe, which is already piped to C file 252 * through the running chain. Then create a new pipe to the H file as 253 * stdout, and fork the rest of the chain again. 254 */ 255 256 if ((to_cfd = dup (1)) == -1) 257 flexfatal (_("dup(1) failed")); 258 to_c = fdopen (to_cfd, "w"); 259 260 if (write_header) { 261 if (freopen ((char *) chain->extra, "w", stdout) == NULL) 262 flexfatal (_("freopen(headerfilename) failed")); 263 264 filter_apply_chain (chain->next); 265 to_h = stdout; 266 } 267 268 /* Now to_c is a pipe to the C branch, and to_h is a pipe to the H branch. 269 */ 270 271 if (write_header) { 272 fputs (check_4_gnu_m4, to_h); 273 fputs ("m4_changecom`'m4_dnl\n", to_h); 274 fputs ("m4_changequote`'m4_dnl\n", to_h); 275 fputs ("m4_changequote([[,]])[[]]m4_dnl\n", to_h); 276 fputs ("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_h); 277 fputs ("m4_define( [[M4_YY_IN_HEADER]],[[]])m4_dnl\n", 278 to_h); 279 fprintf (to_h, "#ifndef %sHEADER_H\n", prefix); 280 fprintf (to_h, "#define %sHEADER_H 1\n", prefix); 281 fprintf (to_h, "#define %sIN_HEADER 1\n\n", prefix); 282 fprintf (to_h, 283 "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n", 284 headerfilename ? headerfilename : "<stdout>"); 285 286 } 287 288 fputs (check_4_gnu_m4, to_c); 289 fputs ("m4_changecom`'m4_dnl\n", to_c); 290 fputs ("m4_changequote`'m4_dnl\n", to_c); 291 fputs ("m4_changequote([[,]])[[]]m4_dnl\n", to_c); 292 fputs ("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_c); 293 fprintf (to_c, "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n", 294 outfilename ? outfilename : "<stdout>"); 295 296 buf = (char *) flex_alloc (readsz); 297 if (!buf) 298 flexerror (_("flex_alloc failed in filter_tee_header")); 299 while (fgets (buf, readsz, stdin)) { 300 fputs (buf, to_c); 301 if (write_header) 302 fputs (buf, to_h); 303 } 304 305 if (write_header) { 306 fprintf (to_h, "\n"); 307 308 /* write a fake line number. It will get fixed by the linedir filter. */ 309 fprintf (to_h, "#line 4000 \"M4_YY_OUTFILE_NAME\"\n"); 310 311 fprintf (to_h, "#undef %sIN_HEADER\n", prefix); 312 fprintf (to_h, "#endif /* %sHEADER_H */\n", prefix); 313 fputs ("m4_undefine( [[M4_YY_IN_HEADER]])m4_dnl\n", to_h); 314 315 fflush (to_h); 316 if (ferror (to_h)) 317 lerrsf (_("error writing output file %s"), 318 (char *) chain->extra); 319 320 else if (fclose (to_h)) 321 lerrsf (_("error closing output file %s"), 322 (char *) chain->extra); 323 } 324 325 fflush (to_c); 326 if (ferror (to_c)) 327 lerrsf (_("error writing output file %s"), 328 outfilename ? outfilename : "<stdout>"); 329 330 else if (fclose (to_c)) 331 lerrsf (_("error closing output file %s"), 332 outfilename ? outfilename : "<stdout>"); 333 334 while (wait (0) > 0) ; 335 336 exit (0); 337 return 0; 338} 339 340/** Adjust the line numbers in the #line directives of the generated scanner. 341 * After the m4 expansion, the line numbers are incorrect since the m4 macros 342 * can add or remove lines. This only adjusts line numbers for generated code, 343 * not user code. This also happens to be a good place to squeeze multiple 344 * blank lines into a single blank line. 345 */ 346int filter_fix_linedirs (struct filter *chain) 347{ 348 char *buf; 349 const int readsz = 512; 350 int lineno = 1; 351 bool in_gen = true; /* in generated code */ 352 bool last_was_blank = false; 353 354 if (!chain) 355 return 0; 356 357 buf = (char *) flex_alloc (readsz); 358 if (!buf) 359 flexerror (_("flex_alloc failed in filter_fix_linedirs")); 360 361 while (fgets (buf, readsz, stdin)) { 362 363 regmatch_t m[10]; 364 365 /* Check for #line directive. */ 366 if (buf[0] == '#' 367 && regexec (®ex_linedir, buf, 3, m, 0) == 0) { 368 369 int num; 370 char *fname; 371 372 /* extract the line number and filename */ 373 num = regmatch_strtol (&m[1], buf, NULL, 0); 374 fname = regmatch_dup (&m[2], buf); 375 376 if (strcmp (fname, 377 outfilename ? outfilename : "<stdout>") 378 == 0 379 || strcmp (fname, 380 headerfilename ? headerfilename : "<stdout>") 381 == 0) { 382 383 char *s1, *s2; 384 char filename[MAXLINE]; 385 386 s1 = fname; 387 s2 = filename; 388 389 while ((s2 - filename) < (MAXLINE - 1) && *s1) { 390 /* Escape the backslash */ 391 if (*s1 == '\\') 392 *s2++ = '\\'; 393 /* Escape the double quote */ 394 if (*s1 == '\"') 395 *s2++ = '\\'; 396 /* Copy the character as usual */ 397 *s2++ = *s1++; 398 } 399 400 *s2 = '\0'; 401 402 /* Adjust the line directives. */ 403 in_gen = true; 404 snprintf (buf, readsz, "#line %d \"%s\"\n", 405 lineno + 1, filename); 406 } 407 else { 408 /* it's a #line directive for code we didn't write */ 409 in_gen = false; 410 } 411 412 free (fname); 413 last_was_blank = false; 414 } 415 416 /* squeeze blank lines from generated code */ 417 else if (in_gen 418 && regexec (®ex_blank_line, buf, 0, NULL, 419 0) == 0) { 420 if (last_was_blank) 421 continue; 422 else 423 last_was_blank = true; 424 } 425 426 else { 427 /* it's a line of normal, non-empty code. */ 428 last_was_blank = false; 429 } 430 431 fputs (buf, stdout); 432 lineno++; 433 } 434 fflush (stdout); 435 if (ferror (stdout)) 436 lerrsf (_("error writing output file %s"), 437 outfilename ? outfilename : "<stdout>"); 438 439 else if (fclose (stdout)) 440 lerrsf (_("error closing output file %s"), 441 outfilename ? outfilename : "<stdout>"); 442 443 return 0; 444} 445