1/* $NetBSD: unifdef.c,v 1.21 2012/02/29 23:35:10 joerg Exp $ */ 2 3/* 4 * Copyright (c) 1985, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Dave Yost. It was rewritten to support ANSI C by Tony Finch. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35/* 36 * Copyright (c) 2002, 2003 Tony Finch <dot@dotat.at> 37 * 38 * This code is derived from software contributed to Berkeley by 39 * Dave Yost. It was rewritten to support ANSI C by Tony Finch. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. All advertising materials mentioning features or use of this software 50 * must display the following acknowledgement: 51 * This product includes software developed by the University of 52 * California, Berkeley and its contributors. 53 * 4. Neither the name of the University nor the names of its contributors 54 * may be used to endorse or promote products derived from this software 55 * without specific prior written permission. 56 * 57 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 58 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 59 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 60 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 61 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 62 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 63 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 64 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 65 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 66 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 67 * SUCH DAMAGE. 68 */ 69 70#include <sys/cdefs.h> 71 72#ifndef lint 73#if 0 74static const char copyright[] = 75"@(#) Copyright (c) 1985, 1993\n\ 76 The Regents of the University of California. All rights reserved.\n"; 77#endif 78#ifdef __IDSTRING 79__IDSTRING(Berkeley, "@(#)unifdef.c 8.1 (Berkeley) 6/6/93"); 80__IDSTRING(NetBSD, "$NetBSD: unifdef.c,v 1.21 2012/02/29 23:35:10 joerg Exp $"); 81__IDSTRING(dotat, "$dotat: things/unifdef.c,v 1.161 2003/07/01 15:32:48 fanf2 Exp $"); 82#endif 83#endif /* not lint */ 84#ifdef __FBSDID 85__FBSDID("$FreeBSD: src/usr.bin/unifdef/unifdef.c,v 1.18 2003/07/01 15:30:43 fanf Exp $"); 86#endif 87 88/* 89 * unifdef - remove ifdef'ed lines 90 * 91 * Wishlist: 92 * provide an option which will append the name of the 93 * appropriate symbol after #else's and #endif's 94 * provide an option which will check symbols after 95 * #else's and #endif's to see that they match their 96 * corresponding #ifdef or #ifndef 97 * generate #line directives in place of deleted code 98 * 99 * The first two items above require better buffer handling, which would 100 * also make it possible to handle all "dodgy" directives correctly. 101 */ 102 103#include <ctype.h> 104#include <err.h> 105#include <libgen.h> 106#include <stdarg.h> 107#include <stdio.h> 108#include <stdlib.h> 109#include <string.h> 110#include <unistd.h> 111 112#include <sys/param.h> 113#include <sys/stat.h> 114 115#include "stdbool.h" 116 117/* types of input lines: */ 118typedef enum { 119 LT_TRUEI, /* a true #if with ignore flag */ 120 LT_FALSEI, /* a false #if with ignore flag */ 121 LT_IF, /* an unknown #if */ 122 LT_TRUE, /* a true #if */ 123 LT_FALSE, /* a false #if */ 124 LT_ELIF, /* an unknown #elif */ 125 LT_ELTRUE, /* a true #elif */ 126 LT_ELFALSE, /* a false #elif */ 127 LT_ELSE, /* #else */ 128 LT_ENDIF, /* #endif */ 129 LT_DODGY, /* flag: directive is not on one line */ 130 LT_DODGY_LAST = LT_DODGY + LT_ENDIF, 131 LT_PLAIN, /* ordinary line */ 132 LT_EOF, /* end of file */ 133 LT_COUNT 134} Linetype; 135 136static char const * const linetype_name[] = { 137 "TRUEI", "FALSEI", "IF", "TRUE", "FALSE", 138 "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF", 139 "DODGY TRUEI", "DODGY FALSEI", 140 "DODGY IF", "DODGY TRUE", "DODGY FALSE", 141 "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE", 142 "DODGY ELSE", "DODGY ENDIF", 143 "PLAIN", "EOF" 144}; 145 146/* state of #if processing */ 147typedef enum { 148 IS_OUTSIDE, 149 IS_FALSE_PREFIX, /* false #if followed by false #elifs */ 150 IS_TRUE_PREFIX, /* first non-false #(el)if is true */ 151 IS_PASS_MIDDLE, /* first non-false #(el)if is unknown */ 152 IS_FALSE_MIDDLE, /* a false #elif after a pass state */ 153 IS_TRUE_MIDDLE, /* a true #elif after a pass state */ 154 IS_PASS_ELSE, /* an else after a pass state */ 155 IS_FALSE_ELSE, /* an else after a true state */ 156 IS_TRUE_ELSE, /* an else after only false states */ 157 IS_FALSE_TRAILER, /* #elifs after a true are false */ 158 IS_COUNT 159} Ifstate; 160 161static char const * const ifstate_name[] = { 162 "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX", 163 "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE", 164 "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE", 165 "FALSE_TRAILER" 166}; 167 168/* state of comment parser */ 169typedef enum { 170 NO_COMMENT = false, /* outside a comment */ 171 C_COMMENT, /* in a comment like this one */ 172 CXX_COMMENT, /* between // and end of line */ 173 STARTING_COMMENT, /* just after slash-backslash-newline */ 174 FINISHING_COMMENT /* star-backslash-newline in a C comment */ 175} Comment_state; 176 177static char const * const comment_name[] = { 178 "NO", "C", "CXX", "STARTING", "FINISHING" 179}; 180 181/* state of preprocessor line parser */ 182typedef enum { 183 LS_START, /* only space and comments on this line */ 184 LS_HASH, /* only space, comments, and a hash */ 185 LS_DIRTY /* this line can't be a preprocessor line */ 186} Line_state; 187 188static char const * const linestate_name[] = { 189 "START", "HASH", "DIRTY" 190}; 191 192/* 193 * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1 194 */ 195#define MAXDEPTH 64 /* maximum #if nesting */ 196#define MAXLINE 4096 /* maximum length of line */ 197#define MAXSYMS 4096 /* maximum number of symbols */ 198 199/* 200 * Sometimes when editing a keyword the replacement text is longer, so 201 * we leave some space at the end of the tline buffer to accommodate this. 202 */ 203#define EDITSLOP 10 204 205/* 206 * Globals. 207 */ 208 209static bool complement; /* -c: do the complement */ 210static bool debugging; /* -d: debugging reports */ 211static bool iocccok; /* -e: fewer IOCCC errors */ 212static bool killconsts; /* -k: eval constant #ifs */ 213static bool lnblank; /* -l: blank deleted lines */ 214static bool symlist; /* -s: output symbol list */ 215static bool text; /* -t: this is a text file */ 216 217static const char *symname[MAXSYMS]; /* symbol name */ 218static const char *value[MAXSYMS]; /* -Dsym=value */ 219static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */ 220static int nsyms; /* number of symbols */ 221 222static FILE *input; /* input file pointer */ 223static FILE *output; /* output file pointer */ 224static const char *filename; /* input file name */ 225static char *ofilename; /* output file name */ 226static char tmpname[MAXPATHLEN]; /* used when overwriting */ 227static int linenum; /* current line number */ 228static int overwriting; /* output overwrites input */ 229 230static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */ 231static char *keyword; /* used for editing #elif's */ 232 233static Comment_state incomment; /* comment parser state */ 234static Line_state linestate; /* #if line parser state */ 235static Ifstate ifstate[MAXDEPTH]; /* #if processor state */ 236static bool ignoring[MAXDEPTH]; /* ignore comments state */ 237static int stifline[MAXDEPTH]; /* start of current #if */ 238static int depth; /* current #if nesting */ 239static bool keepthis; /* don't delete constant #if */ 240 241static int exitstat; /* program exit status */ 242 243static void addsym(bool, bool, char *); 244static void debug(const char *, ...) __printflike(1, 2); 245__dead static void done(void); 246__dead static void error(const char *); 247static int findsym(const char *); 248static void flushline(bool); 249static Linetype get_line(void); 250static Linetype ifeval(const char **); 251static void ignoreoff(void); 252static void ignoreon(void); 253static void keywordedit(const char *); 254static void nest(void); 255__dead static void process(void); 256static const char *skipcomment(const char *); 257static const char *skipsym(const char *); 258static void state(Ifstate); 259static int strlcmp(const char *, const char *, size_t); 260__dead static void usage(void); 261 262#define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_') 263 264/* 265 * The main program. 266 */ 267int 268main(int argc, char *argv[]) 269{ 270 int opt; 271 struct stat isb, osb; 272 273 while ((opt = getopt(argc, argv, "i:D:U:I:o:cdeklst")) != -1) 274 switch (opt) { 275 case 'i': /* treat stuff controlled by these symbols as text */ 276 /* 277 * For strict backwards-compatibility the U or D 278 * should be immediately after the -i but it doesn't 279 * matter much if we relax that requirement. 280 */ 281 opt = *optarg++; 282 if (opt == 'D') 283 addsym(true, true, optarg); 284 else if (opt == 'U') 285 addsym(true, false, optarg); 286 else 287 usage(); 288 break; 289 case 'D': /* define a symbol */ 290 addsym(false, true, optarg); 291 break; 292 case 'U': /* undef a symbol */ 293 addsym(false, false, optarg); 294 break; 295 case 'I': 296 /* no-op for compatibility with cpp */ 297 break; 298 case 'c': /* treat -D as -U and vice versa */ 299 complement = true; 300 break; 301 case 'd': 302 debugging = true; 303 break; 304 case 'e': /* fewer errors from dodgy lines */ 305 iocccok = true; 306 break; 307 case 'k': /* process constant #ifs */ 308 killconsts = true; 309 break; 310 case 'l': /* blank deleted lines instead of omitting them */ 311 lnblank = true; 312 break; 313 case 'o': /* output to a file */ 314 ofilename = optarg; 315 break; 316 case 's': /* only output list of symbols that control #ifs */ 317 symlist = true; 318 break; 319 case 't': /* don't parse C comments */ 320 text = true; 321 break; 322 default: 323 usage(); 324 } 325 argc -= optind; 326 argv += optind; 327 if (nsyms == 0 && !symlist) { 328 warnx("must -D or -U at least one symbol"); 329 usage(); 330 } 331 if (argc > 1) { 332 errx(2, "can only do one file"); 333 } else if (argc == 1 && strcmp(*argv, "-") != 0) { 334 filename = *argv; 335 input = fopen(filename, "r"); 336 if (input == NULL) 337 err(2, "can't open %s", filename); 338 } else { 339 filename = "[stdin]"; 340 input = stdin; 341 } 342 if (ofilename == NULL) { 343 output = stdout; 344 } else { 345 if (stat(ofilename, &osb) == 0) { 346 if (fstat(fileno(input), &isb) != 0) 347 err(2, "can't fstat %s", filename); 348 349 overwriting = (osb.st_dev == isb.st_dev && 350 osb.st_ino == isb.st_ino); 351 } 352 if (overwriting) { 353 int ofd; 354 355 snprintf(tmpname, sizeof(tmpname), "%s/unifdef.XXXXXX", 356 dirname(ofilename)); 357 if ((ofd = mkstemp(tmpname)) != -1) 358 output = fdopen(ofd, "w+"); 359 if (output == NULL) 360 err(2, "can't create temporary file"); 361 fchmod(ofd, isb.st_mode & ACCESSPERMS); 362 } else { 363 output = fopen(ofilename, "w"); 364 if (output == NULL) 365 err(2, "can't open %s", ofilename); 366 } 367 } 368 process(); 369 abort(); /* bug */ 370} 371 372static void 373usage(void) 374{ 375 fprintf(stderr, "usage: unifdef [-cdeklst] [-o output]" 376 " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n"); 377 exit(2); 378} 379 380/* 381 * A state transition function alters the global #if processing state 382 * in a particular way. The table below is indexed by the current 383 * processing state and the type of the current line. 384 * 385 * Nesting is handled by keeping a stack of states; some transition 386 * functions increase or decrease the depth. They also maintain the 387 * ignore state on a stack. In some complicated cases they have to 388 * alter the preprocessor directive, as follows. 389 * 390 * When we have processed a group that starts off with a known-false 391 * #if/#elif sequence (which has therefore been deleted) followed by a 392 * #elif that we don't understand and therefore must keep, we edit the 393 * latter into a #if to keep the nesting correct. 394 * 395 * When we find a true #elif in a group, the following block will 396 * always be kept and the rest of the sequence after the next #elif or 397 * #else will be discarded. We edit the #elif into a #else and the 398 * following directive to #endif since this has the desired behaviour. 399 * 400 * "Dodgy" directives are split across multiple lines, the most common 401 * example being a multi-line comment hanging off the right of the 402 * directive. We can handle them correctly only if there is no change 403 * from printing to dropping (or vice versa) caused by that directive. 404 * If the directive is the first of a group we have a choice between 405 * failing with an error, or passing it through unchanged instead of 406 * evaluating it. The latter is not the default to avoid questions from 407 * users about unifdef unexpectedly leaving behind preprocessor directives. 408 */ 409typedef void state_fn(void); 410 411/* report an error */ 412__dead static void Eelif (void) { error("Inappropriate #elif"); } 413__dead static void Eelse (void) { error("Inappropriate #else"); } 414__dead static void Eendif(void) { error("Inappropriate #endif"); } 415__dead static void Eeof (void) { error("Premature EOF"); } 416__dead static void Eioccc(void) { error("Obfuscated preprocessor control line"); } 417/* plain line handling */ 418static void print (void) { flushline(true); } 419static void drop (void) { flushline(false); } 420/* output lacks group's start line */ 421static void Strue (void) { drop(); ignoreoff(); state(IS_TRUE_PREFIX); } 422static void Sfalse(void) { drop(); ignoreoff(); state(IS_FALSE_PREFIX); } 423static void Selse (void) { drop(); state(IS_TRUE_ELSE); } 424/* print/pass this block */ 425static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); } 426static void Pelse (void) { print(); state(IS_PASS_ELSE); } 427static void Pendif(void) { print(); --depth; } 428/* discard this block */ 429static void Dfalse(void) { drop(); ignoreoff(); state(IS_FALSE_TRAILER); } 430static void Delif (void) { drop(); ignoreoff(); state(IS_FALSE_MIDDLE); } 431static void Delse (void) { drop(); state(IS_FALSE_ELSE); } 432static void Dendif(void) { drop(); --depth; } 433/* first line of group */ 434static void Fdrop (void) { nest(); Dfalse(); } 435static void Fpass (void) { nest(); Pelif(); } 436static void Ftrue (void) { nest(); Strue(); } 437static void Ffalse(void) { nest(); Sfalse(); } 438/* variable pedantry for obfuscated lines */ 439static void Oiffy (void) { if (iocccok) Fpass(); else Eioccc(); ignoreon(); } 440static void Oif (void) { if (iocccok) Fpass(); else Eioccc(); } 441static void Oelif (void) { if (iocccok) Pelif(); else Eioccc(); } 442/* ignore comments in this block */ 443static void Idrop (void) { Fdrop(); ignoreon(); } 444static void Itrue (void) { Ftrue(); ignoreon(); } 445static void Ifalse(void) { Ffalse(); ignoreon(); } 446/* edit this line */ 447static void Mpass (void) { strncpy(keyword, "if ", 4); Pelif(); } 448static void Mtrue (void) { keywordedit("else\n"); state(IS_TRUE_MIDDLE); } 449static void Melif (void) { keywordedit("endif\n"); state(IS_FALSE_TRAILER); } 450static void Melse (void) { keywordedit("endif\n"); state(IS_FALSE_ELSE); } 451 452static state_fn * const trans_table[IS_COUNT][LT_COUNT] = { 453/* IS_OUTSIDE */ 454{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif, 455 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eendif, 456 print, done }, 457/* IS_FALSE_PREFIX */ 458{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif, 459 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc, 460 drop, Eeof }, 461/* IS_TRUE_PREFIX */ 462{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif, 463 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc, 464 print, Eeof }, 465/* IS_PASS_MIDDLE */ 466{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif, 467 Oiffy, Oiffy, Fpass, Oif, Oif, Pelif, Oelif, Oelif, Pelse, Pendif, 468 print, Eeof }, 469/* IS_FALSE_MIDDLE */ 470{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif, 471 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc, 472 drop, Eeof }, 473/* IS_TRUE_MIDDLE */ 474{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif, 475 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Pendif, 476 print, Eeof }, 477/* IS_PASS_ELSE */ 478{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif, 479 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Pendif, 480 print, Eeof }, 481/* IS_FALSE_ELSE */ 482{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif, 483 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc, 484 drop, Eeof }, 485/* IS_TRUE_ELSE */ 486{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif, 487 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eioccc, 488 print, Eeof }, 489/* IS_FALSE_TRAILER */ 490{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif, 491 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc, 492 drop, Eeof } 493/*TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF 494 TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF (DODGY) 495 PLAIN EOF */ 496}; 497 498/* 499 * State machine utility functions 500 */ 501static void 502done(void) 503{ 504 if (incomment) 505 error("EOF in comment"); 506 if (fclose(output)) { 507 if (overwriting) { 508 unlink(tmpname); 509 errx(2, "%s unchanged", ofilename); 510 } 511 } 512 if (overwriting && rename(tmpname, ofilename)) { 513 unlink(tmpname); 514 errx(2, "%s unchanged", ofilename); 515 } 516 exit(exitstat); 517} 518static void 519ignoreoff(void) 520{ 521 ignoring[depth] = ignoring[depth-1]; 522} 523static void 524ignoreon(void) 525{ 526 ignoring[depth] = true; 527} 528static void 529keywordedit(const char *replacement) 530{ 531 strlcpy(keyword, replacement, tline + sizeof(tline) - keyword); 532 print(); 533} 534static void 535nest(void) 536{ 537 depth += 1; 538 if (depth >= MAXDEPTH) 539 error("Too many levels of nesting"); 540 stifline[depth] = linenum; 541} 542static void 543state(Ifstate is) 544{ 545 ifstate[depth] = is; 546} 547 548/* 549 * Write a line to the output or not, according to command line options. 550 */ 551static void 552flushline(bool keep) 553{ 554 if (symlist) 555 return; 556 if (keep ^ complement) 557 fputs(tline, output); 558 else { 559 if (lnblank) 560 putc('\n', output); 561 exitstat = 1; 562 } 563} 564 565/* 566 * The driver for the state machine. 567 */ 568static void 569process(void) 570{ 571 Linetype lineval; 572 573 for (;;) { 574 linenum++; 575 lineval = get_line(); 576 trans_table[ifstate[depth]][lineval](); 577 debug("process %s -> %s depth %d", 578 linetype_name[lineval], 579 ifstate_name[ifstate[depth]], depth); 580 } 581} 582 583/* 584 * Parse a line and determine its type. We keep the preprocessor line 585 * parser state between calls in the global variable linestate, with 586 * help from skipcomment(). 587 */ 588static Linetype 589get_line(void) 590{ 591 const char *cp; 592 int cursym; 593 int kwlen; 594 Linetype retval; 595 Comment_state wascomment; 596 597 if (fgets(tline, MAXLINE, input) == NULL) 598 return (LT_EOF); 599 retval = LT_PLAIN; 600 wascomment = incomment; 601 cp = skipcomment(tline); 602 if (linestate == LS_START) { 603 if (*cp == '#') { 604 linestate = LS_HASH; 605 cp = skipcomment(cp + 1); 606 } else if (*cp != '\0') 607 linestate = LS_DIRTY; 608 } 609 if (!incomment && linestate == LS_HASH) { 610 keyword = tline + (cp - tline); 611 cp = skipsym(cp); 612 kwlen = cp - keyword; 613 /* no way can we deal with a continuation inside a keyword */ 614 if (strncmp(cp, "\\\n", 2) == 0) 615 Eioccc(); 616 if (strlcmp("ifdef", keyword, kwlen) == 0 || 617 strlcmp("ifndef", keyword, kwlen) == 0) { 618 cp = skipcomment(cp); 619 if ((cursym = findsym(cp)) < 0) 620 retval = LT_IF; 621 else { 622 retval = (keyword[2] == 'n') 623 ? LT_FALSE : LT_TRUE; 624 if (value[cursym] == NULL) 625 retval = (retval == LT_TRUE) 626 ? LT_FALSE : LT_TRUE; 627 if (ignore[cursym]) 628 retval = (retval == LT_TRUE) 629 ? LT_TRUEI : LT_FALSEI; 630 } 631 cp = skipsym(cp); 632 } else if (strlcmp("if", keyword, kwlen) == 0) 633 retval = ifeval(&cp); 634 else if (strlcmp("elif", keyword, kwlen) == 0) 635 retval = ifeval(&cp) - LT_IF + LT_ELIF; 636 else if (strlcmp("else", keyword, kwlen) == 0) 637 retval = LT_ELSE; 638 else if (strlcmp("endif", keyword, kwlen) == 0) 639 retval = LT_ENDIF; 640 else { 641 linestate = LS_DIRTY; 642 retval = LT_PLAIN; 643 } 644 cp = skipcomment(cp); 645 if (*cp != '\0') { 646 linestate = LS_DIRTY; 647 if (retval == LT_TRUE || retval == LT_FALSE || 648 retval == LT_TRUEI || retval == LT_FALSEI) 649 retval = LT_IF; 650 if (retval == LT_ELTRUE || retval == LT_ELFALSE) 651 retval = LT_ELIF; 652 } 653 if (retval != LT_PLAIN && (wascomment || incomment)) { 654 retval += LT_DODGY; 655 if (incomment) 656 linestate = LS_DIRTY; 657 } 658 } 659 if (linestate == LS_DIRTY) { 660 while (*cp != '\0') 661 cp = skipcomment(cp + 1); 662 } 663 debug("parser %s comment %s line", 664 comment_name[incomment], linestate_name[linestate]); 665 return (retval); 666} 667 668/* 669 * These are the binary operators that are supported by the expression 670 * evaluator. Note that if support for division is added then we also 671 * need short-circuiting booleans because of divide-by-zero. 672 */ 673static int op_lt(int a, int b) { return (a < b); } 674static int op_gt(int a, int b) { return (a > b); } 675static int op_le(int a, int b) { return (a <= b); } 676static int op_ge(int a, int b) { return (a >= b); } 677static int op_eq(int a, int b) { return (a == b); } 678static int op_ne(int a, int b) { return (a != b); } 679static int op_or(int a, int b) { return (a || b); } 680static int op_and(int a, int b) { return (a && b); } 681 682/* 683 * An evaluation function takes three arguments, as follows: (1) a pointer to 684 * an element of the precedence table which lists the operators at the current 685 * level of precedence; (2) a pointer to an integer which will receive the 686 * value of the expression; and (3) a pointer to a char* that points to the 687 * expression to be evaluated and that is updated to the end of the expression 688 * when evaluation is complete. The function returns LT_FALSE if the value of 689 * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the 690 * expression could not be evaluated. 691 */ 692struct ops; 693 694typedef Linetype eval_fn(const struct ops *, int *, const char **); 695 696static eval_fn eval_table, eval_unary; 697 698/* 699 * The precedence table. Expressions involving binary operators are evaluated 700 * in a table-driven way by eval_table. When it evaluates a subexpression it 701 * calls the inner function with its first argument pointing to the next 702 * element of the table. Innermost expressions have special non-table-driven 703 * handling. 704 */ 705static const struct ops { 706 eval_fn *inner; 707 struct op { 708 const char *str; 709 int (*fn)(int, int); 710 } op[5]; 711} eval_ops[] = { 712 { eval_table, { { "||", op_or } } }, 713 { eval_table, { { "&&", op_and } } }, 714 { eval_table, { { "==", op_eq }, 715 { "!=", op_ne } } }, 716 { eval_unary, { { "<=", op_le }, 717 { ">=", op_ge }, 718 { "<", op_lt }, 719 { ">", op_gt } } } 720}; 721 722/* 723 * Function for evaluating the innermost parts of expressions, 724 * viz. !expr (expr) defined(symbol) symbol number 725 * We reset the keepthis flag when we find a non-constant subexpression. 726 */ 727static Linetype 728eval_unary(const struct ops *ops, int *valp, const char **cpp) 729{ 730 const char *cp; 731 char *ep; 732 int sym; 733 734 cp = skipcomment(*cpp); 735 if (*cp == '!') { 736 debug("eval%td !", ops - eval_ops); 737 cp++; 738 if (eval_unary(ops, valp, &cp) == LT_IF) 739 return (LT_IF); 740 *valp = !*valp; 741 } else if (*cp == '(') { 742 cp++; 743 debug("eval%td (", ops - eval_ops); 744 if (eval_table(eval_ops, valp, &cp) == LT_IF) 745 return (LT_IF); 746 cp = skipcomment(cp); 747 if (*cp++ != ')') 748 return (LT_IF); 749 } else if (isdigit((unsigned char)*cp)) { 750 debug("eval%td number", ops - eval_ops); 751 *valp = strtol(cp, &ep, 0); 752 cp = skipsym(cp); 753 } else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) { 754 cp = skipcomment(cp+7); 755 debug("eval%td defined", ops - eval_ops); 756 if (*cp++ != '(') 757 return (LT_IF); 758 cp = skipcomment(cp); 759 sym = findsym(cp); 760 if (sym < 0 || symlist) 761 return (LT_IF); 762 *valp = (value[sym] != NULL); 763 cp = skipsym(cp); 764 cp = skipcomment(cp); 765 if (*cp++ != ')') 766 return (LT_IF); 767 keepthis = false; 768 } else if (!endsym(*cp)) { 769 debug("eval%td symbol", ops - eval_ops); 770 sym = findsym(cp); 771 if (sym < 0 || symlist) 772 return (LT_IF); 773 if (value[sym] == NULL) 774 *valp = 0; 775 else { 776 *valp = strtol(value[sym], &ep, 0); 777 if (*ep != '\0' || ep == value[sym]) 778 return (LT_IF); 779 } 780 cp = skipsym(cp); 781 keepthis = false; 782 } else { 783 debug("eval%td bad expr", ops - eval_ops); 784 return (LT_IF); 785 } 786 787 *cpp = cp; 788 debug("eval%td = %d", ops - eval_ops, *valp); 789 return (*valp ? LT_TRUE : LT_FALSE); 790} 791 792/* 793 * Table-driven evaluation of binary operators. 794 */ 795static Linetype 796eval_table(const struct ops *ops, int *valp, const char **cpp) 797{ 798 const struct op *op; 799 const char *cp; 800 int val; 801 802 debug("eval%td", ops - eval_ops); 803 cp = *cpp; 804 if (ops->inner(ops+1, valp, &cp) == LT_IF) 805 return (LT_IF); 806 for (;;) { 807 cp = skipcomment(cp); 808 for (op = ops->op; op->str != NULL; op++) 809 if (strncmp(cp, op->str, strlen(op->str)) == 0) 810 break; 811 if (op->str == NULL) 812 break; 813 cp += strlen(op->str); 814 debug("eval%td %s", ops - eval_ops, op->str); 815 if (ops->inner(ops+1, &val, &cp) == LT_IF) 816 return (LT_IF); 817 *valp = op->fn(*valp, val); 818 } 819 820 *cpp = cp; 821 debug("eval%td = %d", ops - eval_ops, *valp); 822 return (*valp ? LT_TRUE : LT_FALSE); 823} 824 825/* 826 * Evaluate the expression on a #if or #elif line. If we can work out 827 * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we 828 * return just a generic LT_IF. 829 */ 830static Linetype 831ifeval(const char **cpp) 832{ 833 int ret; 834 int val; 835 836 debug("eval %s", *cpp); 837 keepthis = killconsts ? false : true; 838 ret = eval_table(eval_ops, &val, cpp); 839 debug("eval = %d", val); 840 return (keepthis ? LT_IF : ret); 841} 842 843/* 844 * Skip over comments and stop at the next character position that is 845 * not whitespace. Between calls we keep the comment state in the 846 * global variable incomment, and we also adjust the global variable 847 * linestate when we see a newline. 848 * XXX: doesn't cope with the buffer splitting inside a state transition. 849 */ 850static const char * 851skipcomment(const char *cp) 852{ 853 if (text || ignoring[depth]) { 854 for (; isspace((unsigned char)*cp); cp++) 855 if (*cp == '\n') 856 linestate = LS_START; 857 return (cp); 858 } 859 while (*cp != '\0') 860 /* don't reset to LS_START after a line continuation */ 861 if (strncmp(cp, "\\\n", 2) == 0) 862 cp += 2; 863 else switch (incomment) { 864 case NO_COMMENT: 865 if (strncmp(cp, "/\\\n", 3) == 0) { 866 incomment = STARTING_COMMENT; 867 cp += 3; 868 } else if (strncmp(cp, "/*", 2) == 0) { 869 incomment = C_COMMENT; 870 cp += 2; 871 } else if (strncmp(cp, "//", 2) == 0) { 872 incomment = CXX_COMMENT; 873 cp += 2; 874 } else if (strncmp(cp, "\n", 1) == 0) { 875 linestate = LS_START; 876 cp += 1; 877 } else if (strchr(" \t", *cp) != NULL) { 878 cp += 1; 879 } else 880 return (cp); 881 continue; 882 case CXX_COMMENT: 883 if (strncmp(cp, "\n", 1) == 0) { 884 incomment = NO_COMMENT; 885 linestate = LS_START; 886 } 887 cp += 1; 888 continue; 889 case C_COMMENT: 890 if (strncmp(cp, "*\\\n", 3) == 0) { 891 incomment = FINISHING_COMMENT; 892 cp += 3; 893 } else if (strncmp(cp, "*/", 2) == 0) { 894 incomment = NO_COMMENT; 895 cp += 2; 896 } else 897 cp += 1; 898 continue; 899 case STARTING_COMMENT: 900 if (*cp == '*') { 901 incomment = C_COMMENT; 902 cp += 1; 903 } else if (*cp == '/') { 904 incomment = CXX_COMMENT; 905 cp += 1; 906 } else { 907 incomment = NO_COMMENT; 908 linestate = LS_DIRTY; 909 } 910 continue; 911 case FINISHING_COMMENT: 912 if (*cp == '/') { 913 incomment = NO_COMMENT; 914 cp += 1; 915 } else 916 incomment = C_COMMENT; 917 continue; 918 default: 919 abort(); /* bug */ 920 } 921 return (cp); 922} 923 924/* 925 * Skip over an identifier. 926 */ 927static const char * 928skipsym(const char *cp) 929{ 930 while (!endsym(*cp)) 931 ++cp; 932 return (cp); 933} 934 935/* 936 * Look for the symbol in the symbol table. If it is found, we return 937 * the symbol table index, else we return -1. 938 */ 939static int 940findsym(const char *str) 941{ 942 const char *cp; 943 int symind; 944 945 cp = skipsym(str); 946 if (cp == str) 947 return (-1); 948 if (symlist) 949 printf("%.*s\n", (int)(cp-str), str); 950 for (symind = 0; symind < nsyms; ++symind) { 951 if (strlcmp(symname[symind], str, cp-str) == 0) { 952 debug("findsym %s %s", symname[symind], 953 value[symind] ? value[symind] : ""); 954 return (symind); 955 } 956 } 957 return (-1); 958} 959 960/* 961 * Add a symbol to the symbol table. 962 */ 963static void 964addsym(bool ignorethis, bool definethis, char *sym) 965{ 966 int symind; 967 char *val; 968 969 symind = findsym(sym); 970 if (symind < 0) { 971 if (nsyms >= MAXSYMS) 972 errx(2, "too many symbols"); 973 symind = nsyms++; 974 } 975 symname[symind] = sym; 976 ignore[symind] = ignorethis; 977 val = sym + (skipsym(sym) - sym); 978 if (definethis) { 979 if (*val == '=') { 980 value[symind] = val+1; 981 *val = '\0'; 982 } else if (*val == '\0') 983 value[symind] = ""; 984 else 985 usage(); 986 } else { 987 if (*val != '\0') 988 usage(); 989 value[symind] = NULL; 990 } 991} 992 993/* 994 * Compare s with n characters of t. 995 * The same as strncmp() except that it checks that s[n] == '\0'. 996 */ 997static int 998strlcmp(const char *s, const char *t, size_t n) 999{ 1000 while (n-- && *t != '\0') 1001 if (*s != *t) 1002 return ((unsigned char)*s - (unsigned char)*t); 1003 else 1004 ++s, ++t; 1005 return ((unsigned char)*s); 1006} 1007 1008/* 1009 * Diagnostics. 1010 */ 1011static void 1012debug(const char *msg, ...) 1013{ 1014 va_list ap; 1015 1016 if (debugging) { 1017 va_start(ap, msg); 1018 vwarnx(msg, ap); 1019 va_end(ap); 1020 } 1021} 1022 1023static void 1024error(const char *msg) 1025{ 1026 if (depth == 0) 1027 warnx("%s: %d: %s", filename, linenum, msg); 1028 else 1029 warnx("%s: %d: %s (#if line %d depth %d)", 1030 filename, linenum, msg, stifline[depth], depth); 1031 fclose(output); 1032 if (overwriting) { 1033 unlink(tmpname); 1034 errx(2, "%s unchanged", ofilename); 1035 } 1036 errx(2, "output may be truncated"); 1037} 1038