filter.c revision 250125
1228072Sbapt/* filter - postprocessing of flex output through filters */ 2228072Sbapt 3228072Sbapt/* This file is part of flex. */ 4228072Sbapt 5228072Sbapt/* Redistribution and use in source and binary forms, with or without */ 6228072Sbapt/* modification, are permitted provided that the following conditions */ 7228072Sbapt/* are met: */ 8228072Sbapt 9228072Sbapt/* 1. Redistributions of source code must retain the above copyright */ 10228072Sbapt/* notice, this list of conditions and the following disclaimer. */ 11228072Sbapt/* 2. Redistributions in binary form must reproduce the above copyright */ 12228072Sbapt/* notice, this list of conditions and the following disclaimer in the */ 13228072Sbapt/* documentation and/or other materials provided with the distribution. */ 14228072Sbapt 15228072Sbapt/* Neither the name of the University nor the names of its contributors */ 16228072Sbapt/* may be used to endorse or promote products derived from this software */ 17228072Sbapt/* without specific prior written permission. */ 18228072Sbapt 19228072Sbapt/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 20228072Sbapt/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 21228072Sbapt/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 22228072Sbapt/* PURPOSE. */ 23228072Sbapt 24228072Sbapt#include "flexdef.h" 25228072Sbaptstatic const char * check_4_gnu_m4 = 26228072Sbapt "m4_dnl ifdef(`__gnu__', ," 27228072Sbapt "`errprint(Flex requires GNU M4. Set the PATH or set the M4 environment variable to its path name.)" 28228072Sbapt " m4exit(2)')\n"; 29228072Sbapt 30228072Sbapt 31228072Sbapt/** global chain. */ 32228072Sbaptstruct filter *output_chain = NULL; 33228072Sbapt 34228072Sbapt/* Allocate and initialize an external filter. 35228072Sbapt * @param chain the current chain or NULL for new chain 36228072Sbapt * @param cmd the command to execute. 37228072Sbapt * @param ... a NULL terminated list of (const char*) arguments to command, 38228072Sbapt * not including argv[0]. 39228072Sbapt * @return newest filter in chain 40228072Sbapt */ 41228072Sbaptstruct filter *filter_create_ext (struct filter *chain, const char *cmd, 42228072Sbapt ...) 43228072Sbapt{ 44228072Sbapt struct filter *f; 45228072Sbapt int max_args; 46228072Sbapt const char *s; 47228072Sbapt va_list ap; 48228072Sbapt 49228072Sbapt /* allocate and initialize new filter */ 50228072Sbapt f = (struct filter *) flex_alloc (sizeof (struct filter)); 51250125Sjkim if (!f) 52250125Sjkim flexerror (_("flex_alloc failed (f) in filter_create_ext")); 53228072Sbapt memset (f, 0, sizeof (*f)); 54228072Sbapt f->filter_func = NULL; 55228072Sbapt f->extra = NULL; 56228072Sbapt f->next = NULL; 57228072Sbapt f->argc = 0; 58228072Sbapt 59228072Sbapt if (chain != NULL) { 60228072Sbapt /* append f to end of chain */ 61228072Sbapt while (chain->next) 62228072Sbapt chain = chain->next; 63228072Sbapt chain->next = f; 64228072Sbapt } 65228072Sbapt 66228072Sbapt 67228072Sbapt /* allocate argv, and populate it with the argument list. */ 68228072Sbapt max_args = 8; 69228072Sbapt f->argv = 70228072Sbapt (const char **) flex_alloc (sizeof (char *) * 71228072Sbapt (max_args + 1)); 72250125Sjkim if (!f->argv) 73250125Sjkim flexerror (_("flex_alloc failed (f->argv) in filter_create_ext")); 74228072Sbapt f->argv[f->argc++] = cmd; 75228072Sbapt 76228072Sbapt va_start (ap, cmd); 77228072Sbapt while ((s = va_arg (ap, const char *)) != NULL) { 78228072Sbapt if (f->argc >= max_args) { 79228072Sbapt max_args += 8; 80228072Sbapt f->argv = 81228072Sbapt (const char **) flex_realloc (f->argv, 82228072Sbapt sizeof (char 83228072Sbapt *) * 84228072Sbapt (max_args + 85228072Sbapt 1)); 86228072Sbapt } 87228072Sbapt f->argv[f->argc++] = s; 88228072Sbapt } 89228072Sbapt f->argv[f->argc] = NULL; 90228072Sbapt 91228072Sbapt va_end (ap); 92228072Sbapt return f; 93228072Sbapt} 94228072Sbapt 95228072Sbapt/* Allocate and initialize an internal filter. 96228072Sbapt * @param chain the current chain or NULL for new chain 97228072Sbapt * @param filter_func The function that will perform the filtering. 98228072Sbapt * filter_func should return 0 if successful, and -1 99228072Sbapt * if an error occurs -- or it can simply exit(). 100228072Sbapt * @param extra optional user-defined data to pass to the filter. 101228072Sbapt * @return newest filter in chain 102228072Sbapt */ 103228072Sbaptstruct filter *filter_create_int (struct filter *chain, 104228072Sbapt int (*filter_func) (struct filter *), 105228072Sbapt void *extra) 106228072Sbapt{ 107228072Sbapt struct filter *f; 108228072Sbapt 109228072Sbapt /* allocate and initialize new filter */ 110228072Sbapt f = (struct filter *) flex_alloc (sizeof (struct filter)); 111250125Sjkim if (!f) 112250125Sjkim flexerror (_("flex_alloc failed in filter_create_int")); 113228072Sbapt memset (f, 0, sizeof (*f)); 114228072Sbapt f->next = NULL; 115228072Sbapt f->argc = 0; 116228072Sbapt f->argv = NULL; 117228072Sbapt 118228072Sbapt f->filter_func = filter_func; 119228072Sbapt f->extra = extra; 120228072Sbapt 121228072Sbapt if (chain != NULL) { 122228072Sbapt /* append f to end of chain */ 123228072Sbapt while (chain->next) 124228072Sbapt chain = chain->next; 125228072Sbapt chain->next = f; 126228072Sbapt } 127228072Sbapt 128228072Sbapt return f; 129228072Sbapt} 130228072Sbapt 131228072Sbapt/** Fork and exec entire filter chain. 132228072Sbapt * @param chain The head of the chain. 133228072Sbapt * @return true on success. 134228072Sbapt */ 135228072Sbaptbool filter_apply_chain (struct filter * chain) 136228072Sbapt{ 137228072Sbapt int pid, pipes[2]; 138250125Sjkim int r; 139250125Sjkim const int readsz = 512; 140250125Sjkim char *buf; 141228072Sbapt 142250125Sjkim 143228072Sbapt /* Tricky recursion, since we want to begin the chain 144228072Sbapt * at the END. Why? Because we need all the forked processes 145228072Sbapt * to be children of the main flex process. 146228072Sbapt */ 147228072Sbapt if (chain) 148228072Sbapt filter_apply_chain (chain->next); 149228072Sbapt else 150228072Sbapt return true; 151228072Sbapt 152228072Sbapt /* Now we are the right-most unprocessed link in the chain. 153228072Sbapt */ 154228072Sbapt 155228072Sbapt fflush (stdout); 156228072Sbapt fflush (stderr); 157228072Sbapt 158250125Sjkim 159228072Sbapt if (pipe (pipes) == -1) 160228072Sbapt flexerror (_("pipe failed")); 161228072Sbapt 162228072Sbapt if ((pid = fork ()) == -1) 163228072Sbapt flexerror (_("fork failed")); 164228072Sbapt 165228072Sbapt if (pid == 0) { 166228072Sbapt /* child */ 167228072Sbapt 168228072Sbapt /* We need stdin (the FILE* stdin) to connect to this new pipe. 169228072Sbapt * There is no portable way to set stdin to a new file descriptor, 170228072Sbapt * as stdin is not an lvalue on some systems (BSD). 171228072Sbapt * So we dup the new pipe onto the stdin descriptor and use a no-op fseek 172228072Sbapt * to sync the stream. This is a Hail Mary situation. It seems to work. 173228072Sbapt */ 174228072Sbapt close (pipes[1]); 175250125Sjkimclearerr(stdin); 176228072Sbapt if (dup2 (pipes[0], fileno (stdin)) == -1) 177228072Sbapt flexfatal (_("dup2(pipes[0],0)")); 178228072Sbapt close (pipes[0]); 179228072Sbapt fseek (stdin, 0, SEEK_CUR); 180228072Sbapt 181228072Sbapt /* run as a filter, either internally or by exec */ 182228072Sbapt if (chain->filter_func) { 183228072Sbapt int r; 184228072Sbapt 185228072Sbapt if ((r = chain->filter_func (chain)) == -1) 186228072Sbapt flexfatal (_("filter_func failed")); 187228072Sbapt exit (0); 188228072Sbapt } 189228072Sbapt else { 190228072Sbapt execvp (chain->argv[0], 191228072Sbapt (char **const) (chain->argv)); 192250125Sjkim lerrsf_fatal ( _("exec of %s failed"), 193250125Sjkim chain->argv[0]); 194228072Sbapt } 195228072Sbapt 196228072Sbapt exit (1); 197228072Sbapt } 198228072Sbapt 199228072Sbapt /* Parent */ 200228072Sbapt close (pipes[0]); 201228072Sbapt if (dup2 (pipes[1], fileno (stdout)) == -1) 202228072Sbapt flexfatal (_("dup2(pipes[1],1)")); 203228072Sbapt close (pipes[1]); 204228072Sbapt fseek (stdout, 0, SEEK_CUR); 205228072Sbapt 206228072Sbapt return true; 207228072Sbapt} 208228072Sbapt 209228072Sbapt/** Truncate the chain to max_len number of filters. 210228072Sbapt * @param chain the current chain. 211228072Sbapt * @param max_len the maximum length of the chain. 212228072Sbapt * @return the resulting length of the chain. 213228072Sbapt */ 214228072Sbaptint filter_truncate (struct filter *chain, int max_len) 215228072Sbapt{ 216228072Sbapt int len = 1; 217228072Sbapt 218228072Sbapt if (!chain) 219228072Sbapt return 0; 220228072Sbapt 221228072Sbapt while (chain->next && len < max_len) { 222228072Sbapt chain = chain->next; 223228072Sbapt ++len; 224228072Sbapt } 225228072Sbapt 226228072Sbapt chain->next = NULL; 227228072Sbapt return len; 228228072Sbapt} 229228072Sbapt 230228072Sbapt/** Splits the chain in order to write to a header file. 231228072Sbapt * Similar in spirit to the 'tee' program. 232228072Sbapt * The header file name is in extra. 233228072Sbapt * @return 0 (zero) on success, and -1 on failure. 234228072Sbapt */ 235228072Sbaptint filter_tee_header (struct filter *chain) 236228072Sbapt{ 237228072Sbapt /* This function reads from stdin and writes to both the C file and the 238228072Sbapt * header file at the same time. 239228072Sbapt */ 240228072Sbapt 241228072Sbapt const int readsz = 512; 242228072Sbapt char *buf; 243228072Sbapt int to_cfd = -1; 244228072Sbapt FILE *to_c = NULL, *to_h = NULL; 245228072Sbapt bool write_header; 246228072Sbapt 247228072Sbapt write_header = (chain->extra != NULL); 248228072Sbapt 249228072Sbapt /* Store a copy of the stdout pipe, which is already piped to C file 250228072Sbapt * through the running chain. Then create a new pipe to the H file as 251228072Sbapt * stdout, and fork the rest of the chain again. 252228072Sbapt */ 253228072Sbapt 254228072Sbapt if ((to_cfd = dup (1)) == -1) 255228072Sbapt flexfatal (_("dup(1) failed")); 256228072Sbapt to_c = fdopen (to_cfd, "w"); 257228072Sbapt 258228072Sbapt if (write_header) { 259228072Sbapt if (freopen ((char *) chain->extra, "w", stdout) == NULL) 260228072Sbapt flexfatal (_("freopen(headerfilename) failed")); 261228072Sbapt 262228072Sbapt filter_apply_chain (chain->next); 263228072Sbapt to_h = stdout; 264228072Sbapt } 265228072Sbapt 266228072Sbapt /* Now to_c is a pipe to the C branch, and to_h is a pipe to the H branch. 267228072Sbapt */ 268228072Sbapt 269228072Sbapt if (write_header) { 270228072Sbapt fputs (check_4_gnu_m4, to_h); 271228072Sbapt fputs ("m4_changecom`'m4_dnl\n", to_h); 272228072Sbapt fputs ("m4_changequote`'m4_dnl\n", to_h); 273228072Sbapt fputs ("m4_changequote([[,]])[[]]m4_dnl\n", to_h); 274228072Sbapt fputs ("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_h); 275228072Sbapt fputs ("m4_define( [[M4_YY_IN_HEADER]],[[]])m4_dnl\n", 276228072Sbapt to_h); 277228072Sbapt fprintf (to_h, "#ifndef %sHEADER_H\n", prefix); 278228072Sbapt fprintf (to_h, "#define %sHEADER_H 1\n", prefix); 279228072Sbapt fprintf (to_h, "#define %sIN_HEADER 1\n\n", prefix); 280228072Sbapt fprintf (to_h, 281228072Sbapt "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n", 282228072Sbapt headerfilename ? headerfilename : "<stdout>"); 283228072Sbapt 284228072Sbapt } 285228072Sbapt 286228072Sbapt fputs (check_4_gnu_m4, to_c); 287228072Sbapt fputs ("m4_changecom`'m4_dnl\n", to_c); 288228072Sbapt fputs ("m4_changequote`'m4_dnl\n", to_c); 289228072Sbapt fputs ("m4_changequote([[,]])[[]]m4_dnl\n", to_c); 290228072Sbapt fputs ("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_c); 291228072Sbapt fprintf (to_c, "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n", 292228072Sbapt outfilename ? outfilename : "<stdout>"); 293228072Sbapt 294228072Sbapt buf = (char *) flex_alloc (readsz); 295250125Sjkim if (!buf) 296250125Sjkim flexerror (_("flex_alloc failed in filter_tee_header")); 297228072Sbapt while (fgets (buf, readsz, stdin)) { 298228072Sbapt fputs (buf, to_c); 299228072Sbapt if (write_header) 300228072Sbapt fputs (buf, to_h); 301228072Sbapt } 302228072Sbapt 303228072Sbapt if (write_header) { 304228072Sbapt fprintf (to_h, "\n"); 305228072Sbapt 306228072Sbapt /* write a fake line number. It will get fixed by the linedir filter. */ 307228072Sbapt fprintf (to_h, "#line 4000 \"M4_YY_OUTFILE_NAME\"\n"); 308228072Sbapt 309228072Sbapt fprintf (to_h, "#undef %sIN_HEADER\n", prefix); 310228072Sbapt fprintf (to_h, "#endif /* %sHEADER_H */\n", prefix); 311228072Sbapt fputs ("m4_undefine( [[M4_YY_IN_HEADER]])m4_dnl\n", to_h); 312228072Sbapt 313228072Sbapt fflush (to_h); 314250125Sjkim if (ferror (to_h)) 315250125Sjkim lerrsf (_("error writing output file %s"), 316250125Sjkim (char *) chain->extra); 317228072Sbapt 318250125Sjkim else if (fclose (to_h)) 319250125Sjkim lerrsf (_("error closing output file %s"), 320250125Sjkim (char *) chain->extra); 321228072Sbapt } 322228072Sbapt 323228072Sbapt fflush (to_c); 324228072Sbapt if (ferror (to_c)) 325228072Sbapt lerrsf (_("error writing output file %s"), 326228072Sbapt outfilename ? outfilename : "<stdout>"); 327228072Sbapt 328228072Sbapt else if (fclose (to_c)) 329228072Sbapt lerrsf (_("error closing output file %s"), 330228072Sbapt outfilename ? outfilename : "<stdout>"); 331228072Sbapt 332228072Sbapt while (wait (0) > 0) ; 333228072Sbapt 334228072Sbapt exit (0); 335228072Sbapt return 0; 336228072Sbapt} 337228072Sbapt 338228072Sbapt/** Adjust the line numbers in the #line directives of the generated scanner. 339228072Sbapt * After the m4 expansion, the line numbers are incorrect since the m4 macros 340228072Sbapt * can add or remove lines. This only adjusts line numbers for generated code, 341228072Sbapt * not user code. This also happens to be a good place to squeeze multiple 342228072Sbapt * blank lines into a single blank line. 343228072Sbapt */ 344228072Sbaptint filter_fix_linedirs (struct filter *chain) 345228072Sbapt{ 346228072Sbapt char *buf; 347228072Sbapt const int readsz = 512; 348228072Sbapt int lineno = 1; 349228072Sbapt bool in_gen = true; /* in generated code */ 350228072Sbapt bool last_was_blank = false; 351228072Sbapt 352228072Sbapt if (!chain) 353228072Sbapt return 0; 354228072Sbapt 355228072Sbapt buf = (char *) flex_alloc (readsz); 356250125Sjkim if (!buf) 357250125Sjkim flexerror (_("flex_alloc failed in filter_fix_linedirs")); 358228072Sbapt 359228072Sbapt while (fgets (buf, readsz, stdin)) { 360228072Sbapt 361228072Sbapt regmatch_t m[10]; 362228072Sbapt 363228072Sbapt /* Check for #line directive. */ 364228072Sbapt if (buf[0] == '#' 365250125Sjkim && regexec (®ex_linedir, buf, 3, m, 0) == 0) { 366228072Sbapt 367228072Sbapt int num; 368228072Sbapt char *fname; 369228072Sbapt 370228072Sbapt /* extract the line number and filename */ 371228072Sbapt num = regmatch_strtol (&m[1], buf, NULL, 0); 372228072Sbapt fname = regmatch_dup (&m[2], buf); 373228072Sbapt 374228072Sbapt if (strcmp (fname, 375228072Sbapt outfilename ? outfilename : "<stdout>") 376228072Sbapt == 0 377228072Sbapt || strcmp (fname, 378228072Sbapt headerfilename ? headerfilename : "<stdout>") 379228072Sbapt == 0) { 380228072Sbapt 381228072Sbapt char *s1, *s2; 382228072Sbapt char filename[MAXLINE]; 383228072Sbapt 384228072Sbapt s1 = fname; 385228072Sbapt s2 = filename; 386228072Sbapt 387228072Sbapt while ((s2 - filename) < (MAXLINE - 1) && *s1) { 388228072Sbapt /* Escape the backslash */ 389228072Sbapt if (*s1 == '\\') 390228072Sbapt *s2++ = '\\'; 391228072Sbapt /* Escape the double quote */ 392228072Sbapt if (*s1 == '\"') 393228072Sbapt *s2++ = '\\'; 394228072Sbapt /* Copy the character as usual */ 395228072Sbapt *s2++ = *s1++; 396228072Sbapt } 397228072Sbapt 398228072Sbapt *s2 = '\0'; 399228072Sbapt 400228072Sbapt /* Adjust the line directives. */ 401228072Sbapt in_gen = true; 402228072Sbapt snprintf (buf, readsz, "#line %d \"%s\"\n", 403228072Sbapt lineno + 1, filename); 404228072Sbapt } 405228072Sbapt else { 406228072Sbapt /* it's a #line directive for code we didn't write */ 407228072Sbapt in_gen = false; 408228072Sbapt } 409228072Sbapt 410228072Sbapt free (fname); 411228072Sbapt last_was_blank = false; 412228072Sbapt } 413228072Sbapt 414228072Sbapt /* squeeze blank lines from generated code */ 415228072Sbapt else if (in_gen 416228072Sbapt && regexec (®ex_blank_line, buf, 0, NULL, 417228072Sbapt 0) == 0) { 418228072Sbapt if (last_was_blank) 419228072Sbapt continue; 420228072Sbapt else 421228072Sbapt last_was_blank = true; 422228072Sbapt } 423228072Sbapt 424228072Sbapt else { 425228072Sbapt /* it's a line of normal, non-empty code. */ 426228072Sbapt last_was_blank = false; 427228072Sbapt } 428228072Sbapt 429228072Sbapt fputs (buf, stdout); 430228072Sbapt lineno++; 431228072Sbapt } 432228072Sbapt fflush (stdout); 433228072Sbapt if (ferror (stdout)) 434228072Sbapt lerrsf (_("error writing output file %s"), 435228072Sbapt outfilename ? outfilename : "<stdout>"); 436228072Sbapt 437228072Sbapt else if (fclose (stdout)) 438228072Sbapt lerrsf (_("error closing output file %s"), 439228072Sbapt outfilename ? outfilename : "<stdout>"); 440228072Sbapt 441228072Sbapt return 0; 442228072Sbapt} 443228072Sbapt 444228072Sbapt/* vim:set expandtab cindent tabstop=4 softtabstop=4 shiftwidth=4 textwidth=0: */ 445