unifdef.c revision 117076
1/* 2 * Copyright (c) 2002, 2003 Tony Finch <dot@dotat.at> 3 * Copyright (c) 1985, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Dave Yost. It was rewritten to support ANSI C by Tony Finch. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38#include <sys/cdefs.h> 39 40#ifndef lint 41#if 0 42static const char copyright[] = 43"@(#) Copyright (c) 1985, 1993\n\ 44 The Regents of the University of California. All rights reserved.\n"; 45#endif 46#ifdef __IDSTRING 47__IDSTRING(Berkeley, "@(#)unifdef.c 8.1 (Berkeley) 6/6/93"); 48__IDSTRING(NetBSD, "$NetBSD: unifdef.c,v 1.8 2000/07/03 02:51:36 matt Exp $"); 49__IDSTRING(dotat, "$dotat: things/unifdef.c,v 1.156 2003/06/30 14:30:54 fanf2 Exp $"); 50#endif 51#endif /* not lint */ 52#ifdef __FBSDID 53__FBSDID("$FreeBSD: head/usr.bin/unifdef/unifdef.c 117076 2003-06-30 14:46:25Z fanf $"); 54#endif 55 56/* 57 * unifdef - remove ifdef'ed lines 58 * 59 * Wishlist: 60 * provide an option which will append the name of the 61 * appropriate symbol after #else's and #endif's 62 * provide an option which will check symbols after 63 * #else's and #endif's to see that they match their 64 * corresponding #ifdef or #ifndef 65 * generate #line directives in place of deleted code 66 * 67 * The first two items above require better buffer handling, which would 68 * also make it possible to handle all "dodgy" directives correctly. 69 */ 70 71#include <ctype.h> 72#include <err.h> 73#include <stdarg.h> 74#include <stdbool.h> 75#include <stdio.h> 76#include <stdlib.h> 77#include <string.h> 78#include <unistd.h> 79 80/* types of input lines: */ 81typedef enum { 82 LT_TRUEI, /* a true #if with ignore flag */ 83 LT_FALSEI, /* a false #if with ignore flag */ 84 LT_IF, /* an unknown #if */ 85 LT_TRUE, /* a true #if */ 86 LT_FALSE, /* a false #if */ 87 LT_ELIF, /* an unknown #elif */ 88 LT_ELTRUE, /* a true #elif */ 89 LT_ELFALSE, /* a false #elif */ 90 LT_ELSE, /* #else */ 91 LT_ENDIF, /* #endif */ 92 LT_DODGY, /* flag: directive is not on one line */ 93 LT_DODGY_LAST = LT_DODGY + LT_ENDIF, 94 LT_PLAIN, /* ordinary line */ 95 LT_EOF, /* end of file */ 96 LT_COUNT 97} Linetype; 98 99static char const * const linetype_name[] = { 100 "TRUEI", "FALSEI", "IF", "TRUE", "FALSE", 101 "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF", 102 "DODGY TRUEI", "DODGY FALSEI", 103 "DODGY IF", "DODGY TRUE", "DODGY FALSE", 104 "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE", 105 "DODGY ELSE", "DODGY ENDIF", 106 "PLAIN", "EOF" 107}; 108 109/* state of #if processing */ 110typedef enum { 111 IS_OUTSIDE, 112 IS_FALSE_PREFIX, /* false #if followed by false #elifs */ 113 IS_TRUE_PREFIX, /* first non-false #(el)if is true */ 114 IS_PASS_MIDDLE, /* first non-false #(el)if is unknown */ 115 IS_FALSE_MIDDLE, /* a false #elif after a pass state */ 116 IS_TRUE_MIDDLE, /* a true #elif after a pass state */ 117 IS_PASS_ELSE, /* an else after a pass state */ 118 IS_FALSE_ELSE, /* an else after a true state */ 119 IS_TRUE_ELSE, /* an else after only false states */ 120 IS_FALSE_TRAILER, /* #elifs after a true are false */ 121 IS_COUNT 122} Ifstate; 123 124static char const * const ifstate_name[] = { 125 "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX", 126 "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE", 127 "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE", 128 "FALSE_TRAILER" 129}; 130 131/* state of comment parser */ 132typedef enum { 133 NO_COMMENT = false, /* outside a comment */ 134 C_COMMENT, /* in a comment like this one */ 135 CXX_COMMENT, /* between // and end of line */ 136 STARTING_COMMENT, /* just after slash-backslash-newline */ 137 FINISHING_COMMENT /* star-backslash-newline in a C comment */ 138} Comment_state; 139 140static char const * const comment_name[] = { 141 "NO", "C", "CXX", "STARTING", "FINISHING" 142}; 143 144/* state of preprocessor line parser */ 145typedef enum { 146 LS_START, /* only space and comments on this line */ 147 LS_HASH, /* only space, comments, and a hash */ 148 LS_DIRTY /* this line can't be a preprocessor line */ 149} Line_state; 150 151static char const * const linestate_name[] = { 152 "START", "HASH", "DIRTY" 153}; 154 155/* 156 * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1 157 */ 158#define MAXDEPTH 64 /* maximum #if nesting */ 159#define MAXLINE 4096 /* maximum length of line */ 160#define MAXSYMS 4096 /* maximum number of symbols */ 161 162/* 163 * Sometimes when editing a keyword the replacement text is longer, so 164 * we leave some space at the end of the tline buffer to accommodate this. 165 */ 166#define EDITSLOP 10 167 168/* 169 * Globals. 170 */ 171 172static bool complement; /* -c: do the complement */ 173static bool debugging; /* -d: debugging reports */ 174static bool iocccok; /* -e: fewer IOCCC errors */ 175static bool killconsts; /* -k: eval constant #ifs */ 176static bool lnblank; /* -l: blank deleted lines */ 177static bool symlist; /* -s: output symbol list */ 178static bool text; /* -t: this is a text file */ 179 180static const char *symname[MAXSYMS]; /* symbol name */ 181static const char *value[MAXSYMS]; /* -Dsym=value */ 182static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */ 183static int nsyms; /* number of symbols */ 184 185static FILE *input; /* input file pointer */ 186static const char *filename; /* input file name */ 187static int linenum; /* current line number */ 188 189static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */ 190static char *keyword; /* used for editing #elif's */ 191 192static Comment_state incomment; /* comment parser state */ 193static Line_state linestate; /* #if line parser state */ 194static Ifstate ifstate[MAXDEPTH]; /* #if processor state */ 195static bool ignoring[MAXDEPTH]; /* ignore comments state */ 196static int stifline[MAXDEPTH]; /* start of current #if */ 197static int depth; /* current #if nesting */ 198static bool keepthis; /* don't delete constant #if */ 199 200static int exitstat; /* program exit status */ 201 202static void addsym(bool, bool, char *); 203static void debug(const char *, ...); 204static void error(const char *); 205static int findsym(const char *); 206static void flushline(bool); 207static Linetype getline(void); 208static Linetype ifeval(const char **); 209static void ignoreoff(void); 210static void ignoreon(void); 211static void keywordedit(const char *); 212static void nest(void); 213static void process(void); 214static const char *skipcomment(const char *); 215static const char *skipsym(const char *); 216static void state(Ifstate); 217static int strlcmp(const char *, const char *, size_t); 218static void usage(void); 219 220#define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_') 221 222/* 223 * The main program. 224 */ 225int 226main(int argc, char *argv[]) 227{ 228 int opt; 229 230 while ((opt = getopt(argc, argv, "i:D:U:I:cdeklst")) != -1) 231 switch (opt) { 232 case 'i': /* treat stuff controlled by these symbols as text */ 233 /* 234 * For strict backwards-compatibility the U or D 235 * should be immediately after the -i but it doesn't 236 * matter much if we relax that requirement. 237 */ 238 opt = *optarg++; 239 if (opt == 'D') 240 addsym(true, true, optarg); 241 else if (opt == 'U') 242 addsym(true, false, optarg); 243 else 244 usage(); 245 break; 246 case 'D': /* define a symbol */ 247 addsym(false, true, optarg); 248 break; 249 case 'U': /* undef a symbol */ 250 addsym(false, false, optarg); 251 break; 252 case 'I': 253 /* no-op for compatibility with cpp */ 254 break; 255 case 'c': /* treat -D as -U and vice versa */ 256 complement = true; 257 break; 258 case 'd': 259 debugging = true; 260 break; 261 case 'e': /* fewer errors from dodgy lines */ 262 iocccok = true; 263 break; 264 case 'k': /* process constant #ifs */ 265 killconsts = true; 266 break; 267 case 'l': /* blank deleted lines instead of omitting them */ 268 lnblank = true; 269 break; 270 case 's': /* only output list of symbols that control #ifs */ 271 symlist = true; 272 break; 273 case 't': /* don't parse C comments */ 274 text = true; 275 break; 276 default: 277 usage(); 278 } 279 argc -= optind; 280 argv += optind; 281 if (nsyms == 0 && !symlist) { 282 warnx("must -D or -U at least one symbol"); 283 usage(); 284 } 285 if (argc > 1) { 286 errx(2, "can only do one file"); 287 } else if (argc == 1 && strcmp(*argv, "-") != 0) { 288 filename = *argv; 289 if ((input = fopen(filename, "r")) != NULL) { 290 process(); 291 (void) fclose(input); 292 } else 293 err(2, "can't open %s", *argv); 294 } else { 295 filename = "[stdin]"; 296 input = stdin; 297 process(); 298 } 299 300 exit(exitstat); 301} 302 303static void 304usage(void) 305{ 306 fprintf(stderr, "usage: unifdef [-cdeklst]" 307 " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n"); 308 exit(2); 309} 310 311/* 312 * A state transition function alters the global #if processing state 313 * in a particular way. The table below is indexed by the current 314 * processing state and the type of the current line. A NULL entry 315 * indicates that processing is complete. 316 * 317 * Nesting is handled by keeping a stack of states; some transition 318 * functions increase or decrease the depth. They also maintain the 319 * ignore state on a stack. In some complicated cases they have to 320 * alter the preprocessor directive, as follows. 321 * 322 * When we have processed a group that starts off with a known-false 323 * #if/#elif sequence (which has therefore been deleted) followed by a 324 * #elif that we don't understand and therefore must keep, we edit the 325 * latter into a #if to keep the nesting correct. 326 * 327 * When we find a true #elif in a group, the following block will 328 * always be kept and the rest of the sequence after the next #elif or 329 * #else will be discarded. We edit the #elif into a #else and the 330 * following directive to #endif since this has the desired behaviour. 331 * 332 * "Dodgy" directives are split across multiple lines, the most common 333 * example being a multi-line comment hanging off the right of the 334 * directive. We can handle them correctly only if there is no change 335 * from printing to dropping (or vice versa) caused by that directive. 336 * If the directive is the first of a group we have a choice between 337 * failing with an error, or passing it through unchanged instead of 338 * evaluating it. The latter is not the default to avoid questions from 339 * users about unifdef unexpectedly leaving behind preprocessor directives. 340 */ 341typedef void state_fn(void); 342 343/* report an error */ 344static void Eelif (void) { error("Inappropriate #elif"); } 345static void Eelse (void) { error("Inappropriate #else"); } 346static void Eendif(void) { error("Inappropriate #endif"); } 347static void Eeof (void) { error("Premature EOF"); } 348static void Eioccc(void) { error("Obfuscated preprocessor control line"); } 349/* plain line handling */ 350static void print (void) { flushline(true); } 351static void drop (void) { flushline(false); } 352/* output lacks group's start line */ 353static void Strue (void) { drop(); ignoreoff(); state(IS_TRUE_PREFIX); } 354static void Sfalse(void) { drop(); ignoreoff(); state(IS_FALSE_PREFIX); } 355static void Selse (void) { drop(); state(IS_TRUE_ELSE); } 356/* print/pass this block */ 357static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); } 358static void Pelse (void) { print(); state(IS_PASS_ELSE); } 359static void Pendif(void) { print(); --depth; } 360/* discard this block */ 361static void Dfalse(void) { drop(); ignoreoff(); state(IS_FALSE_TRAILER); } 362static void Delif (void) { drop(); ignoreoff(); state(IS_FALSE_MIDDLE); } 363static void Delse (void) { drop(); state(IS_FALSE_ELSE); } 364static void Dendif(void) { drop(); --depth; } 365/* first line of group */ 366static void Fdrop (void) { nest(); Dfalse(); } 367static void Fpass (void) { nest(); Pelif(); } 368static void Ftrue (void) { nest(); Strue(); } 369static void Ffalse(void) { nest(); Sfalse(); } 370/* variable pedantry for obfuscated lines */ 371static void Oiffy (void) { if (iocccok) Fpass(); else Eioccc(); ignoreon(); } 372static void Oif (void) { if (iocccok) Fpass(); else Eioccc(); } 373static void Oelif (void) { if (iocccok) Pelif(); else Eioccc(); } 374/* ignore comments in this block */ 375static void Idrop (void) { Fdrop(); ignoreon(); } 376static void Itrue (void) { Ftrue(); ignoreon(); } 377static void Ifalse(void) { Ffalse(); ignoreon(); } 378/* edit this line */ 379static void Mpass (void) { strncpy(keyword, "if ", 4); Pelif(); } 380static void Mtrue (void) { keywordedit("else\n"); state(IS_TRUE_MIDDLE); } 381static void Melif (void) { keywordedit("endif\n"); state(IS_FALSE_TRAILER); } 382static void Melse (void) { keywordedit("endif\n"); state(IS_FALSE_ELSE); } 383 384static state_fn * const trans_table[IS_COUNT][LT_COUNT] = { 385/* IS_OUTSIDE */ 386{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif, 387 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eendif, 388 print, NULL }, 389/* IS_FALSE_PREFIX */ 390{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif, 391 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc, 392 drop, Eeof }, 393/* IS_TRUE_PREFIX */ 394{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif, 395 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc, 396 print, Eeof }, 397/* IS_PASS_MIDDLE */ 398{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif, 399 Oiffy, Oiffy, Fpass, Oif, Oif, Pelif, Oelif, Oelif, Pelse, Pendif, 400 print, Eeof }, 401/* IS_FALSE_MIDDLE */ 402{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif, 403 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc, 404 drop, Eeof }, 405/* IS_TRUE_MIDDLE */ 406{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif, 407 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Pendif, 408 print, Eeof }, 409/* IS_PASS_ELSE */ 410{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif, 411 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Pendif, 412 print, Eeof }, 413/* IS_FALSE_ELSE */ 414{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif, 415 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc, 416 drop, Eeof }, 417/* IS_TRUE_ELSE */ 418{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif, 419 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eioccc, 420 print, Eeof }, 421/* IS_FALSE_TRAILER */ 422{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif, 423 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc, 424 drop, Eeof } 425/*TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF 426 TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF (DODGY) 427 PLAIN EOF */ 428}; 429 430/* 431 * State machine utility functions 432 */ 433static void 434ignoreoff(void) 435{ 436 ignoring[depth] = ignoring[depth-1]; 437} 438static void 439ignoreon(void) 440{ 441 ignoring[depth] = true; 442} 443static void 444keywordedit(const char *replacement) 445{ 446 strlcpy(keyword, replacement, tline + sizeof(tline) - keyword); 447 print(); 448} 449static void 450nest(void) 451{ 452 depth += 1; 453 if (depth >= MAXDEPTH) 454 error("Too many levels of nesting"); 455 stifline[depth] = linenum; 456} 457static void 458state(Ifstate is) 459{ 460 ifstate[depth] = is; 461} 462 463/* 464 * Write a line to the output or not, according to command line options. 465 */ 466static void 467flushline(bool keep) 468{ 469 if (symlist) 470 return; 471 if (keep ^ complement) 472 fputs(tline, stdout); 473 else { 474 if (lnblank) 475 putc('\n', stdout); 476 exitstat = 1; 477 } 478} 479 480/* 481 * The driver for the state machine. 482 */ 483static void 484process(void) 485{ 486 Linetype lineval; 487 state_fn *trans; 488 489 for (;;) { 490 linenum++; 491 lineval = getline(); 492 trans = trans_table[ifstate[depth]][lineval]; 493 if (trans == NULL) 494 break; 495 trans(); 496 debug("process %s -> %s depth %d", 497 linetype_name[lineval], 498 ifstate_name[ifstate[depth]], depth); 499 } 500 if (incomment) 501 error("EOF in comment"); 502} 503 504/* 505 * Parse a line and determine its type. We keep the preprocessor line 506 * parser state between calls in the global variable linestate, with 507 * help from skipcomment(). 508 */ 509static Linetype 510getline(void) 511{ 512 const char *cp; 513 int cursym; 514 int kwlen; 515 Linetype retval; 516 Comment_state wascomment; 517 518 if (fgets(tline, MAXLINE, input) == NULL) 519 return (LT_EOF); 520 retval = LT_PLAIN; 521 wascomment = incomment; 522 cp = skipcomment(tline); 523 if (linestate == LS_START) { 524 if (*cp == '#') { 525 linestate = LS_HASH; 526 cp = skipcomment(cp + 1); 527 } else if (*cp != '\0') 528 linestate = LS_DIRTY; 529 } 530 if (!incomment && linestate == LS_HASH) { 531 keyword = tline + (cp - tline); 532 cp = skipsym(cp); 533 kwlen = cp - keyword; 534 /* no way can we deal with a continuation inside a keyword */ 535 if (strncmp(cp, "\\\n", 2) == 0) 536 Eioccc(); 537 if (strlcmp("ifdef", keyword, kwlen) == 0 || 538 strlcmp("ifndef", keyword, kwlen) == 0) { 539 cp = skipcomment(cp); 540 if ((cursym = findsym(cp)) < 0) 541 retval = LT_IF; 542 else { 543 retval = (keyword[2] == 'n') 544 ? LT_FALSE : LT_TRUE; 545 if (value[cursym] == NULL) 546 retval = (retval == LT_TRUE) 547 ? LT_FALSE : LT_TRUE; 548 if (ignore[cursym]) 549 retval = (retval == LT_TRUE) 550 ? LT_TRUEI : LT_FALSEI; 551 } 552 cp = skipsym(cp); 553 } else if (strlcmp("if", keyword, kwlen) == 0) 554 retval = ifeval(&cp); 555 else if (strlcmp("elif", keyword, kwlen) == 0) 556 retval = ifeval(&cp) - LT_IF + LT_ELIF; 557 else if (strlcmp("else", keyword, kwlen) == 0) 558 retval = LT_ELSE; 559 else if (strlcmp("endif", keyword, kwlen) == 0) 560 retval = LT_ENDIF; 561 else { 562 linestate = LS_DIRTY; 563 retval = LT_PLAIN; 564 } 565 cp = skipcomment(cp); 566 if (*cp != '\0') { 567 linestate = LS_DIRTY; 568 if (retval == LT_TRUE || retval == LT_FALSE || 569 retval == LT_TRUEI || retval == LT_FALSEI) 570 retval = LT_IF; 571 if (retval == LT_ELTRUE || retval == LT_ELFALSE) 572 retval = LT_ELIF; 573 } 574 if (retval != LT_PLAIN && (wascomment || incomment)) { 575 retval += LT_DODGY; 576 if (incomment) 577 linestate = LS_DIRTY; 578 } 579 /* skipcomment should have changed the state */ 580 if (linestate == LS_HASH) 581 abort(); /* bug */ 582 } 583 if (linestate == LS_DIRTY) { 584 while (*cp != '\0') 585 cp = skipcomment(cp + 1); 586 } 587 debug("parser %s comment %s line", 588 comment_name[incomment], linestate_name[linestate]); 589 return (retval); 590} 591 592/* 593 * These are the binary operators that are supported by the expression 594 * evaluator. Note that if support for division is added then we also 595 * need short-circuiting booleans because of divide-by-zero. 596 */ 597static int op_lt(int a, int b) { return (a < b); } 598static int op_gt(int a, int b) { return (a > b); } 599static int op_le(int a, int b) { return (a <= b); } 600static int op_ge(int a, int b) { return (a >= b); } 601static int op_eq(int a, int b) { return (a == b); } 602static int op_ne(int a, int b) { return (a != b); } 603static int op_or(int a, int b) { return (a || b); } 604static int op_and(int a, int b) { return (a && b); } 605 606/* 607 * An evaluation function takes three arguments, as follows: (1) a pointer to 608 * an element of the precedence table which lists the operators at the current 609 * level of precedence; (2) a pointer to an integer which will receive the 610 * value of the expression; and (3) a pointer to a char* that points to the 611 * expression to be evaluated and that is updated to the end of the expression 612 * when evaluation is complete. The function returns LT_FALSE if the value of 613 * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the 614 * expression could not be evaluated. 615 */ 616struct ops; 617 618typedef Linetype eval_fn(const struct ops *, int *, const char **); 619 620static eval_fn eval_table, eval_unary; 621 622/* 623 * The precedence table. Expressions involving binary operators are evaluated 624 * in a table-driven way by eval_table. When it evaluates a subexpression it 625 * calls the inner function with its first argument pointing to the next 626 * element of the table. Innermost expressions have special non-table-driven 627 * handling. 628 */ 629static const struct ops { 630 eval_fn *inner; 631 struct op { 632 const char *str; 633 int (*fn)(int, int); 634 } op[5]; 635} eval_ops[] = { 636 { eval_table, { { "||", op_or } } }, 637 { eval_table, { { "&&", op_and } } }, 638 { eval_table, { { "==", op_eq }, 639 { "!=", op_ne } } }, 640 { eval_unary, { { "<=", op_le }, 641 { ">=", op_ge }, 642 { "<", op_lt }, 643 { ">", op_gt } } } 644}; 645 646/* 647 * Function for evaluating the innermost parts of expressions, 648 * viz. !expr (expr) defined(symbol) symbol number 649 * We reset the keepthis flag when we find a non-constant subexpression. 650 */ 651static Linetype 652eval_unary(const struct ops *ops, int *valp, const char **cpp) 653{ 654 const char *cp; 655 char *ep; 656 int sym; 657 658 cp = skipcomment(*cpp); 659 if (*cp == '!') { 660 debug("eval%d !", ops - eval_ops); 661 cp++; 662 if (eval_unary(ops, valp, &cp) == LT_IF) 663 return (LT_IF); 664 *valp = !*valp; 665 } else if (*cp == '(') { 666 cp++; 667 debug("eval%d (", ops - eval_ops); 668 if (eval_table(eval_ops, valp, &cp) == LT_IF) 669 return (LT_IF); 670 cp = skipcomment(cp); 671 if (*cp++ != ')') 672 return (LT_IF); 673 } else if (isdigit((unsigned char)*cp)) { 674 debug("eval%d number", ops - eval_ops); 675 *valp = strtol(cp, &ep, 0); 676 cp = skipsym(cp); 677 } else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) { 678 cp = skipcomment(cp+7); 679 debug("eval%d defined", ops - eval_ops); 680 if (*cp++ != '(') 681 return (LT_IF); 682 cp = skipcomment(cp); 683 sym = findsym(cp); 684 if (sym < 0 && !symlist) 685 return (LT_IF); 686 *valp = (value[sym] != NULL); 687 cp = skipsym(cp); 688 cp = skipcomment(cp); 689 if (*cp++ != ')') 690 return (LT_IF); 691 keepthis = false; 692 } else if (!endsym(*cp)) { 693 debug("eval%d symbol", ops - eval_ops); 694 sym = findsym(cp); 695 if (sym < 0 && !symlist) 696 return (LT_IF); 697 if (value[sym] == NULL) 698 *valp = 0; 699 else { 700 *valp = strtol(value[sym], &ep, 0); 701 if (*ep != '\0' || ep == value[sym]) 702 return (LT_IF); 703 } 704 cp = skipsym(cp); 705 keepthis = false; 706 } else 707 return (LT_IF); 708 709 *cpp = cp; 710 debug("eval%d = %d", ops - eval_ops, *valp); 711 return (*valp ? LT_TRUE : LT_FALSE); 712} 713 714/* 715 * Table-driven evaluation of binary operators. 716 */ 717static Linetype 718eval_table(const struct ops *ops, int *valp, const char **cpp) 719{ 720 const struct op *op; 721 const char *cp; 722 int val; 723 724 debug("eval%d", ops - eval_ops); 725 cp = *cpp; 726 if (ops->inner(ops+1, valp, &cp) == LT_IF) 727 return (LT_IF); 728 for (;;) { 729 cp = skipcomment(cp); 730 for (op = ops->op; op->str != NULL; op++) 731 if (strncmp(cp, op->str, strlen(op->str)) == 0) 732 break; 733 if (op->str == NULL) 734 break; 735 cp += strlen(op->str); 736 debug("eval%d %s", ops - eval_ops, op->str); 737 if (ops->inner(ops+1, &val, &cp) == LT_IF) 738 return (LT_IF); 739 *valp = op->fn(*valp, val); 740 } 741 742 *cpp = cp; 743 debug("eval%d = %d", ops - eval_ops, *valp); 744 return (*valp ? LT_TRUE : LT_FALSE); 745} 746 747/* 748 * Evaluate the expression on a #if or #elif line. If we can work out 749 * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we 750 * return just a generic LT_IF. 751 */ 752static Linetype 753ifeval(const char **cpp) 754{ 755 int ret; 756 int val; 757 758 debug("eval %s", *cpp); 759 keepthis = killconsts ? false : true; 760 ret = eval_table(eval_ops, &val, cpp); 761 return (keepthis ? LT_IF : ret); 762} 763 764/* 765 * Skip over comments and stop at the next character position that is 766 * not whitespace. Between calls we keep the comment state in the 767 * global variable incomment, and we also adjust the global variable 768 * linestate when we see a newline. 769 * XXX: doesn't cope with the buffer splitting inside a state transition. 770 */ 771static const char * 772skipcomment(const char *cp) 773{ 774 if (text || ignoring[depth]) { 775 for (; isspace((unsigned char)*cp); cp++) 776 if (*cp == '\n') 777 linestate = LS_START; 778 return (cp); 779 } 780 while (*cp != '\0') 781 if (strncmp(cp, "\\\n", 2) == 0) 782 cp += 2; 783 else switch (incomment) { 784 case NO_COMMENT: 785 if (strncmp(cp, "/\\\n", 3) == 0) { 786 incomment = STARTING_COMMENT; 787 cp += 3; 788 } else if (strncmp(cp, "/*", 2) == 0) { 789 incomment = C_COMMENT; 790 cp += 2; 791 } else if (strncmp(cp, "//", 2) == 0) { 792 incomment = CXX_COMMENT; 793 cp += 2; 794 } else if (strncmp(cp, "\n", 1) == 0) { 795 linestate = LS_START; 796 cp += 1; 797 } else if (strchr(" \t", *cp) != NULL) { 798 cp += 1; 799 } else 800 return (cp); 801 continue; 802 case CXX_COMMENT: 803 if (strncmp(cp, "\n", 1) == 0) { 804 incomment = NO_COMMENT; 805 linestate = LS_START; 806 } 807 cp += 1; 808 continue; 809 case C_COMMENT: 810 if (strncmp(cp, "*\\\n", 3) == 0) { 811 incomment = FINISHING_COMMENT; 812 cp += 3; 813 } else if (strncmp(cp, "*/", 2) == 0) { 814 incomment = NO_COMMENT; 815 cp += 2; 816 } else 817 cp += 1; 818 continue; 819 case STARTING_COMMENT: 820 if (*cp == '*') { 821 incomment = C_COMMENT; 822 cp += 1; 823 } else if (*cp == '/') { 824 incomment = CXX_COMMENT; 825 cp += 1; 826 } else { 827 incomment = NO_COMMENT; 828 linestate = LS_DIRTY; 829 } 830 continue; 831 case FINISHING_COMMENT: 832 if (*cp == '/') { 833 incomment = NO_COMMENT; 834 cp += 1; 835 } else 836 incomment = C_COMMENT; 837 continue; 838 default: 839 /* bug */ 840 abort(); 841 } 842 return (cp); 843} 844 845/* 846 * Skip over an identifier. 847 */ 848static const char * 849skipsym(const char *cp) 850{ 851 while (!endsym(*cp)) 852 ++cp; 853 return (cp); 854} 855 856/* 857 * Look for the symbol in the symbol table. If is is found, we return 858 * the symbol table index, else we return -1. 859 */ 860static int 861findsym(const char *str) 862{ 863 const char *cp; 864 int symind; 865 866 cp = skipsym(str); 867 if (cp == str) 868 return (-1); 869 if (symlist) 870 printf("%.*s\n", (int)(cp-str), str); 871 for (symind = 0; symind < nsyms; ++symind) { 872 if (strlcmp(symname[symind], str, cp-str) == 0) { 873 debug("findsym %s %s", symname[symind], 874 value[symind] ? value[symind] : ""); 875 return (symind); 876 } 877 } 878 return (-1); 879} 880 881/* 882 * Add a symbol to the symbol table. 883 */ 884static void 885addsym(bool ignorethis, bool definethis, char *sym) 886{ 887 int symind; 888 char *val; 889 890 symind = findsym(sym); 891 if (symind < 0) { 892 if (nsyms >= MAXSYMS) 893 errx(2, "too many symbols"); 894 symind = nsyms++; 895 } 896 symname[symind] = sym; 897 ignore[symind] = ignorethis; 898 val = sym + (skipsym(sym) - sym); 899 if (definethis) { 900 if (*val == '=') { 901 value[symind] = val+1; 902 *val = '\0'; 903 } else if (*val == '\0') 904 value[symind] = ""; 905 else 906 usage(); 907 } else { 908 if (*val != '\0') 909 usage(); 910 value[symind] = NULL; 911 } 912} 913 914/* 915 * Compare s with n characters of t. 916 * The same as strncmp() except that it checks that s[n] == '\0'. 917 */ 918static int 919strlcmp(const char *s, const char *t, size_t n) 920{ 921 while (n-- && *t != '\0') 922 if (*s != *t) 923 return ((unsigned char)*s - (unsigned char)*t); 924 else 925 ++s, ++t; 926 return ((unsigned char)*s); 927} 928 929/* 930 * Diagnostics. 931 */ 932static void 933debug(const char *msg, ...) 934{ 935 va_list ap; 936 937 if (debugging) { 938 va_start(ap, msg); 939 vwarnx(msg, ap); 940 va_end(ap); 941 } 942} 943 944static void 945error(const char *msg) 946{ 947 if (depth == 0) 948 warnx("%s: %d: %s", filename, linenum, msg); 949 else 950 warnx("%s: %d: %s (#if line %d depth %d)", 951 filename, linenum, msg, stifline[depth], depth); 952 errx(2, "output may be truncated"); 953} 954