unifdef.c revision 114594
1/* 2 * Copyright (c) 1985, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Dave Yost. Support for #if and #elif was added by Tony Finch. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37#include <sys/cdefs.h> 38 39#ifndef lint 40#if 0 41static const char copyright[] = 42"@(#) Copyright (c) 1985, 1993\n\ 43 The Regents of the University of California. All rights reserved.\n"; 44#endif 45 46#ifdef __IDSTRING 47__IDSTRING(Berkeley, "@(#)unifdef.c 8.1 (Berkeley) 6/6/93"); 48__IDSTRING(NetBSD, "$NetBSD: unifdef.c,v 1.8 2000/07/03 02:51:36 matt Exp $"); 49__IDSTRING(dotat, "$dotat: things/unifdef.c,v 1.148 2003/01/20 12:05:41 fanf2 Exp $"); 50#endif 51#endif /* not lint */ 52__FBSDID("$FreeBSD: head/usr.bin/unifdef/unifdef.c 114594 2003-05-03 19:44:46Z obrien $"); 53 54/* 55 * unifdef - remove ifdef'ed lines 56 * 57 * Wishlist: 58 * provide an option which will append the name of the 59 * appropriate symbol after #else's and #endif's 60 * provide an option which will check symbols after 61 * #else's and #endif's to see that they match their 62 * corresponding #ifdef or #ifndef 63 * generate #line directives in place of deleted code 64 * 65 * The first two items above require better buffer handling, which would 66 * also make it possible to handle all "dodgy" directives correctly. 67 */ 68 69#include <ctype.h> 70#include <err.h> 71#include <stdarg.h> 72#include <stdbool.h> 73#include <stdio.h> 74#include <stdlib.h> 75#include <string.h> 76#include <unistd.h> 77 78/* types of input lines: */ 79typedef enum { 80 LT_TRUEI, /* a true #if with ignore flag */ 81 LT_FALSEI, /* a false #if with ignore flag */ 82 LT_IF, /* an unknown #if */ 83 LT_TRUE, /* a true #if */ 84 LT_FALSE, /* a false #if */ 85 LT_ELIF, /* an unknown #elif */ 86 LT_ELTRUE, /* a true #elif */ 87 LT_ELFALSE, /* a false #elif */ 88 LT_ELSE, /* #else */ 89 LT_ENDIF, /* #endif */ 90 LT_DODGY, /* flag: directive is not on one line */ 91 LT_DODGY_LAST = LT_DODGY + LT_ENDIF, 92 LT_PLAIN, /* ordinary line */ 93 LT_EOF, /* end of file */ 94 LT_COUNT 95} Linetype; 96 97static char const * const linetype_name[] = { 98 "TRUEI", "FALSEI", "IF", "TRUE", "FALSE", 99 "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF", 100 "DODGY TRUEI", "DODGY FALSEI", 101 "DODGY IF", "DODGY TRUE", "DODGY FALSE", 102 "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE", 103 "DODGY ELSE", "DODGY ENDIF", 104 "PLAIN", "EOF" 105}; 106 107/* state of #if processing */ 108typedef enum { 109 IS_OUTSIDE, 110 IS_FALSE_PREFIX, /* false #if followed by false #elifs */ 111 IS_TRUE_PREFIX, /* first non-false #(el)if is true */ 112 IS_PASS_MIDDLE, /* first non-false #(el)if is unknown */ 113 IS_FALSE_MIDDLE, /* a false #elif after a pass state */ 114 IS_TRUE_MIDDLE, /* a true #elif after a pass state */ 115 IS_PASS_ELSE, /* an else after a pass state */ 116 IS_FALSE_ELSE, /* an else after a true state */ 117 IS_TRUE_ELSE, /* an else after only false states */ 118 IS_FALSE_TRAILER, /* #elifs after a true are false */ 119 IS_COUNT 120} Ifstate; 121 122static char const * const ifstate_name[] = { 123 "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX", 124 "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE", 125 "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE", 126 "FALSE_TRAILER" 127}; 128 129/* state of comment parser */ 130typedef enum { 131 NO_COMMENT = false, /* outside a comment */ 132 C_COMMENT, /* in a comment like this one */ 133 CXX_COMMENT, /* between // and end of line */ 134 STARTING_COMMENT, /* just after slash-backslash-newline */ 135 FINISHING_COMMENT /* star-backslash-newline in a C comment */ 136} Comment_state; 137 138static char const * const comment_name[] = { 139 "NO", "C", "CXX", "STARTING", "FINISHING" 140}; 141 142/* state of preprocessor line parser */ 143typedef enum { 144 LS_START, /* only space and comments on this line */ 145 LS_HASH, /* only space, comments, and a hash */ 146 LS_DIRTY /* this line can't be a preprocessor line */ 147} Line_state; 148 149static char const * const linestate_name[] = { 150 "START", "HASH", "DIRTY" 151}; 152 153/* 154 * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1 155 */ 156#define MAXDEPTH 64 /* maximum #if nesting */ 157#define MAXLINE 4096 /* maximum length of line */ 158#define MAXSYMS 4096 /* maximum number of symbols */ 159 160/* 161 * Sometimes when editing a keyword the replacement text is longer, so 162 * we leave some space at the end of the tline buffer to accommodate this. 163 */ 164#define EDITSLOP 10 165 166/* 167 * Globals. 168 */ 169 170static bool complement; /* -c: do the complement */ 171static bool debugging; /* -d: debugging reports */ 172static bool iocccok; /* -e: fewer IOCCC errors */ 173static bool killconsts; /* -k: eval constant #ifs */ 174static bool lnblank; /* -l: blank deleted lines */ 175static bool symlist; /* -s: output symbol list */ 176static bool text; /* -t: this is a text file */ 177 178static const char *symname[MAXSYMS]; /* symbol name */ 179static const char *value[MAXSYMS]; /* -Dsym=value */ 180static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */ 181static int nsyms; /* number of symbols */ 182 183static FILE *input; /* input file pointer */ 184static const char *filename; /* input file name */ 185static int linenum; /* current line number */ 186 187static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */ 188static char *keyword; /* used for editing #elif's */ 189 190static Comment_state incomment; /* comment parser state */ 191static Line_state linestate; /* #if line parser state */ 192static Ifstate ifstate[MAXDEPTH]; /* #if processor state */ 193static bool ignoring[MAXDEPTH]; /* ignore comments state */ 194static int stifline[MAXDEPTH]; /* start of current #if */ 195static int depth; /* current #if nesting */ 196static bool keepthis; /* don't delete constant #if */ 197 198static int exitstat; /* program exit status */ 199 200static void addsym(bool, bool, char *); 201static void debug(const char *, ...); 202static void error(const char *); 203static int findsym(const char *); 204static void flushline(bool); 205static Linetype getline(void); 206static Linetype ifeval(const char **); 207static void ignoreoff(void); 208static void ignoreon(void); 209static void keywordedit(const char *); 210static void nest(void); 211static void process(void); 212static const char *skipcomment(const char *); 213static const char *skipsym(const char *); 214static void state(Ifstate); 215static int strlcmp(const char *, const char *, size_t); 216static void usage(void); 217 218#define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_') 219 220/* 221 * The main program. 222 */ 223int 224main(int argc, char *argv[]) 225{ 226 int opt; 227 228 while ((opt = getopt(argc, argv, "i:D:U:I:cdeklst")) != -1) 229 switch (opt) { 230 case 'i': /* treat stuff controlled by these symbols as text */ 231 /* 232 * For strict backwards-compatibility the U or D 233 * should be immediately after the -i but it doesn't 234 * matter much if we relax that requirement. 235 */ 236 opt = *optarg++; 237 if (opt == 'D') 238 addsym(true, true, optarg); 239 else if (opt == 'U') 240 addsym(true, false, optarg); 241 else 242 usage(); 243 break; 244 case 'D': /* define a symbol */ 245 addsym(false, true, optarg); 246 break; 247 case 'U': /* undef a symbol */ 248 addsym(false, false, optarg); 249 break; 250 case 'I': 251 /* no-op for compatibility with cpp */ 252 break; 253 case 'c': /* treat -D as -U and vice versa */ 254 complement = true; 255 break; 256 case 'd': 257 debugging = true; 258 break; 259 case 'e': /* fewer errors from dodgy lines */ 260 iocccok = true; 261 break; 262 case 'k': /* process constant #ifs */ 263 killconsts = true; 264 break; 265 case 'l': /* blank deleted lines instead of omitting them */ 266 lnblank = true; 267 break; 268 case 's': /* only output list of symbols that control #ifs */ 269 symlist = true; 270 break; 271 case 't': /* don't parse C comments */ 272 text = true; 273 break; 274 default: 275 usage(); 276 } 277 argc -= optind; 278 argv += optind; 279 if (nsyms == 0 && !symlist) { 280 warnx("must -D or -U at least one symbol"); 281 usage(); 282 } 283 if (argc > 1) { 284 errx(2, "can only do one file"); 285 } else if (argc == 1 && strcmp(*argv, "-") != 0) { 286 filename = *argv; 287 if ((input = fopen(filename, "r")) != NULL) { 288 process(); 289 (void) fclose(input); 290 } else 291 err(2, "can't open %s", *argv); 292 } else { 293 filename = "[stdin]"; 294 input = stdin; 295 process(); 296 } 297 298 exit(exitstat); 299} 300 301static void 302usage(void) 303{ 304 fprintf(stderr, "usage: unifdef [-cdeklst]" 305 " [[-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym]] ... [file]\n"); 306 exit(2); 307} 308 309/* 310 * A state transition function alters the global #if processing state 311 * in a particular way. The table below is indexed by the current 312 * processing state and the type of the current line. A NULL entry 313 * indicate that processing is complete. 314 * 315 * Nesting is handled by keeping a stack of states; some transition 316 * functions increase or decrease the depth. They also maintain the 317 * ignore state on a stack. In some complicated cases they have to 318 * alter the preprocessor directive, as follows. 319 * 320 * When we have processed a group that starts off with a known-false 321 * #if/#elif sequence (which has therefore been deleted) followed by a 322 * #elif that we don't understand and therefore must keep, we edit the 323 * latter into a #if to keep the nesting correct. 324 * 325 * When we find a true #elif in a group, the following block will 326 * always be kept and the rest of the sequence after the next #elif or 327 * #else will be discarded. We edit the #elif into a #else and the 328 * following directive to #endif since this has the desired behaviour. 329 * 330 * "Dodgy" directives are split across multiple lines, the most common 331 * example being a multi-line comment hanging off the right of the 332 * directive. We can handle them correctly only if there is no change 333 * from printing to dropping (or vice versa) caused by that directive. 334 * If the directive is the first of a group we have a choice between 335 * failing with an error, or passing it through unchanged instead of 336 * evaluating it. The latter is not the default to avoid questions from 337 * users about unifdef unexpectedly leaving behind preprocessor directives. 338 */ 339typedef void state_fn(void); 340 341/* report an error */ 342static void Eelif (void) { error("Inappropriate #elif"); } 343static void Eelse (void) { error("Inappropriate #else"); } 344static void Eendif(void) { error("Inappropriate #endif"); } 345static void Eeof (void) { error("Premature EOF"); } 346static void Eioccc(void) { error("Obfuscated preprocessor control line"); } 347/* plain line handling */ 348static void print (void) { flushline(true); } 349static void drop (void) { flushline(false); } 350/* output lacks group's start line */ 351static void Strue (void) { drop(); ignoreoff(); state(IS_TRUE_PREFIX); } 352static void Sfalse(void) { drop(); ignoreoff(); state(IS_FALSE_PREFIX); } 353static void Selse (void) { drop(); state(IS_TRUE_ELSE); } 354/* print/pass this block */ 355static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); } 356static void Pelse (void) { print(); state(IS_PASS_ELSE); } 357static void Pendif(void) { print(); --depth; } 358/* discard this block */ 359static void Dfalse(void) { drop(); ignoreoff(); state(IS_FALSE_TRAILER); } 360static void Delif (void) { drop(); ignoreoff(); state(IS_FALSE_MIDDLE); } 361static void Delse (void) { drop(); state(IS_FALSE_ELSE); } 362static void Dendif(void) { drop(); --depth; } 363/* first line of group */ 364static void Fdrop (void) { nest(); Dfalse(); } 365static void Fpass (void) { nest(); Pelif(); } 366static void Ftrue (void) { nest(); Strue(); } 367static void Ffalse(void) { nest(); Sfalse(); } 368/* variable pedantry for obfuscated lines */ 369static void Oiffy (void) { if (iocccok) Fpass(); else Eioccc(); ignoreon(); } 370static void Oif (void) { if (iocccok) Fpass(); else Eioccc(); } 371static void Oelif (void) { if (iocccok) Pelif(); else Eioccc(); } 372/* ignore comments in this block */ 373static void Idrop (void) { Fdrop(); ignoreon(); } 374static void Itrue (void) { Ftrue(); ignoreon(); } 375static void Ifalse(void) { Ffalse(); ignoreon(); } 376/* edit this line */ 377static void Mpass (void) { strncpy(keyword, "if ", 4); Pelif(); } 378static void Mtrue (void) { keywordedit("else\n"); state(IS_TRUE_MIDDLE); } 379static void Melif (void) { keywordedit("endif\n"); state(IS_FALSE_TRAILER); } 380static void Melse (void) { keywordedit("endif\n"); state(IS_FALSE_ELSE); } 381 382static state_fn * const trans_table[IS_COUNT][LT_COUNT] = { 383/* IS_OUTSIDE */ 384{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif, 385 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eendif, 386 print, NULL }, 387/* IS_FALSE_PREFIX */ 388{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif, 389 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc, 390 drop, Eeof }, 391/* IS_TRUE_PREFIX */ 392{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif, 393 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc, 394 print, Eeof }, 395/* IS_PASS_MIDDLE */ 396{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif, 397 Oiffy, Oiffy, Fpass, Oif, Oif, Pelif, Oelif, Oelif, Pelse, Pendif, 398 print, Eeof }, 399/* IS_FALSE_MIDDLE */ 400{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif, 401 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc, 402 drop, Eeof }, 403/* IS_TRUE_MIDDLE */ 404{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif, 405 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Pendif, 406 print, Eeof }, 407/* IS_PASS_ELSE */ 408{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif, 409 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Pendif, 410 print, Eeof }, 411/* IS_FALSE_ELSE */ 412{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif, 413 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc, 414 drop, Eeof }, 415/* IS_TRUE_ELSE */ 416{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif, 417 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eioccc, 418 print, Eeof }, 419/* IS_FALSE_TRAILER */ 420{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif, 421 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc, 422 drop, Eeof } 423/*TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF 424 TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF (DODGY) 425 PLAIN EOF */ 426}; 427 428/* 429 * State machine utility functions 430 */ 431static void 432ignoreoff(void) 433{ 434 ignoring[depth] = ignoring[depth-1]; 435} 436static void 437ignoreon(void) 438{ 439 ignoring[depth] = true; 440} 441static void 442keywordedit(const char *replacement) 443{ 444 strlcpy(keyword, replacement, tline + sizeof(tline) - keyword); 445 print(); 446} 447static void 448nest(void) 449{ 450 depth += 1; 451 if (depth >= MAXDEPTH) 452 error("Too many levels of nesting"); 453 stifline[depth] = linenum; 454} 455static void 456state(Ifstate is) 457{ 458 ifstate[depth] = is; 459} 460 461/* 462 * Write a line to the output or not, according to command line options. 463 */ 464static void 465flushline(bool keep) 466{ 467 if (symlist) 468 return; 469 if (keep ^ complement) 470 fputs(tline, stdout); 471 else { 472 if (lnblank) 473 putc('\n', stdout); 474 exitstat = 1; 475 } 476} 477 478/* 479 * The driver for the state machine. 480 */ 481static void 482process(void) 483{ 484 Linetype lineval; 485 state_fn *trans; 486 487 for (;;) { 488 linenum++; 489 lineval = getline(); 490 trans = trans_table[ifstate[depth]][lineval]; 491 if (trans == NULL) 492 break; 493 trans(); 494 debug("process %s -> %s depth %d", 495 linetype_name[lineval], 496 ifstate_name[ifstate[depth]], depth); 497 } 498 if (incomment) 499 error("EOF in comment"); 500} 501 502/* 503 * Parse a line and determine its type. We keep the preprocessor line 504 * parser state between calls in a global variable. 505 */ 506static Linetype 507getline(void) 508{ 509 const char *cp; 510 int cursym; 511 int kwlen; 512 Linetype retval; 513 Comment_state wascomment; 514 515 if (fgets(tline, MAXLINE, input) == NULL) 516 return (LT_EOF); 517 retval = LT_PLAIN; 518 wascomment = incomment; 519 cp = skipcomment(tline); 520 if (linestate == LS_START) { 521 if (*cp == '#') { 522 linestate = LS_HASH; 523 cp = skipcomment(cp + 1); 524 } else if (*cp != '\0') 525 linestate = LS_DIRTY; 526 } 527 if (!incomment && linestate == LS_HASH) { 528 keyword = tline + (cp - tline); 529 cp = skipsym(cp); 530 kwlen = cp - keyword; 531 /* no way can we deal with a continuation inside a keyword */ 532 if (strncmp(cp, "\\\n", 2) == 0) 533 Eioccc(); 534 if (strlcmp("ifdef", keyword, kwlen) == 0 || 535 strlcmp("ifndef", keyword, kwlen) == 0) { 536 cp = skipcomment(cp); 537 if ((cursym = findsym(cp)) < 0) 538 retval = LT_IF; 539 else { 540 retval = (keyword[2] == 'n') 541 ? LT_FALSE : LT_TRUE; 542 if (value[cursym] == NULL) 543 retval = (retval == LT_TRUE) 544 ? LT_FALSE : LT_TRUE; 545 if (ignore[cursym]) 546 retval = (retval == LT_TRUE) 547 ? LT_TRUEI : LT_FALSEI; 548 } 549 cp = skipsym(cp); 550 } else if (strlcmp("if", keyword, kwlen) == 0) 551 retval = ifeval(&cp); 552 else if (strlcmp("elif", keyword, kwlen) == 0) 553 retval = ifeval(&cp) - LT_IF + LT_ELIF; 554 else if (strlcmp("else", keyword, kwlen) == 0) 555 retval = LT_ELSE; 556 else if (strlcmp("endif", keyword, kwlen) == 0) 557 retval = LT_ENDIF; 558 else { 559 linestate = LS_DIRTY; 560 retval = LT_PLAIN; 561 } 562 cp = skipcomment(cp); 563 if (*cp != '\0') { 564 linestate = LS_DIRTY; 565 if (retval == LT_TRUE || retval == LT_FALSE || 566 retval == LT_TRUEI || retval == LT_FALSEI) 567 retval = LT_IF; 568 if (retval == LT_ELTRUE || retval == LT_ELFALSE) 569 retval = LT_ELIF; 570 } 571 if (retval != LT_PLAIN && (wascomment || incomment)) { 572 retval += LT_DODGY; 573 if (incomment) 574 linestate = LS_DIRTY; 575 } 576 /* skipcomment should have changed the state */ 577 if (linestate == LS_HASH) 578 abort(); /* bug */ 579 } 580 if (linestate == LS_DIRTY) { 581 while (*cp != '\0') 582 cp = skipcomment(cp + 1); 583 } 584 debug("parser %s comment %s line", 585 comment_name[incomment], linestate_name[linestate]); 586 return (retval); 587} 588 589/* 590 * These are the operators that are supported by the expression evaluator. 591 */ 592static int op_lt(int a, int b) { return (a < b); } 593static int op_gt(int a, int b) { return (a > b); } 594static int op_le(int a, int b) { return (a <= b); } 595static int op_ge(int a, int b) { return (a >= b); } 596static int op_eq(int a, int b) { return (a == b); } 597static int op_ne(int a, int b) { return (a != b); } 598static int op_or(int a, int b) { return (a || b); } 599static int op_and(int a, int b) { return (a && b); } 600 601/* 602 * An evaluation function takes three arguments, as follows: (1) a pointer to 603 * an element of the precedence table which lists the operators at the current 604 * level of precedence; (2) a pointer to an integer which will receive the 605 * value of the expression; and (3) a pointer to a char* that points to the 606 * expression to be evaluated and that is updated to the end of the expression 607 * when evaluation is complete. The function returns LT_FALSE if the value of 608 * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the 609 * expression could not be evaluated. 610 */ 611struct ops; 612 613typedef Linetype eval_fn(const struct ops *, int *, const char **); 614 615static eval_fn eval_table, eval_unary; 616 617/* 618 * The precedence table. Expressions involving binary operators are evaluated 619 * in a table-driven way by eval_table. When it evaluates a subexpression it 620 * calls the inner function with its first argument pointing to the next 621 * element of the table. Innermost expressions have special non-table-driven 622 * handling. 623 */ 624static const struct ops { 625 eval_fn *inner; 626 struct op { 627 const char *str; 628 int (*fn)(int, int); 629 } op[5]; 630} eval_ops[] = { 631 { eval_table, { { "||", op_or } } }, 632 { eval_table, { { "&&", op_and } } }, 633 { eval_table, { { "==", op_eq }, 634 { "!=", op_ne } } }, 635 { eval_unary, { { "<=", op_le }, 636 { ">=", op_ge }, 637 { "<", op_lt }, 638 { ">", op_gt } } } 639}; 640 641/* 642 * Function for evaluating the innermost parts of expressions, 643 * viz. !expr (expr) defined(symbol) symbol number 644 * We reset the keepthis flag when we find a non-constant subexpression. 645 */ 646static Linetype 647eval_unary(const struct ops *ops, int *valp, const char **cpp) 648{ 649 const char *cp; 650 char *ep; 651 int sym; 652 653 cp = skipcomment(*cpp); 654 if (*cp == '!') { 655 debug("eval%d !", ops - eval_ops); 656 cp++; 657 if (eval_unary(ops, valp, &cp) == LT_IF) 658 return (LT_IF); 659 *valp = !*valp; 660 } else if (*cp == '(') { 661 cp++; 662 debug("eval%d (", ops - eval_ops); 663 if (eval_table(eval_ops, valp, &cp) == LT_IF) 664 return (LT_IF); 665 cp = skipcomment(cp); 666 if (*cp++ != ')') 667 return (LT_IF); 668 } else if (isdigit((unsigned char)*cp)) { 669 debug("eval%d number", ops - eval_ops); 670 *valp = strtol(cp, &ep, 0); 671 cp = skipsym(cp); 672 } else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) { 673 cp = skipcomment(cp+7); 674 debug("eval%d defined", ops - eval_ops); 675 if (*cp++ != '(') 676 return (LT_IF); 677 cp = skipcomment(cp); 678 sym = findsym(cp); 679 if (sym < 0 && !symlist) 680 return (LT_IF); 681 *valp = (value[sym] != NULL); 682 cp = skipsym(cp); 683 cp = skipcomment(cp); 684 if (*cp++ != ')') 685 return (LT_IF); 686 keepthis = false; 687 } else if (!endsym(*cp)) { 688 debug("eval%d symbol", ops - eval_ops); 689 sym = findsym(cp); 690 if (sym < 0 && !symlist) 691 return (LT_IF); 692 if (value[sym] == NULL) 693 *valp = 0; 694 else { 695 *valp = strtol(value[sym], &ep, 0); 696 if (*ep != '\0' || ep == value[sym]) 697 return (LT_IF); 698 } 699 cp = skipsym(cp); 700 keepthis = false; 701 } else 702 return (LT_IF); 703 704 *cpp = cp; 705 debug("eval%d = %d", ops - eval_ops, *valp); 706 return (*valp ? LT_TRUE : LT_FALSE); 707} 708 709/* 710 * Table-driven evaluation of binary operators. 711 */ 712static Linetype 713eval_table(const struct ops *ops, int *valp, const char **cpp) 714{ 715 const struct op *op; 716 const char *cp; 717 int val; 718 719 debug("eval%d", ops - eval_ops); 720 cp = *cpp; 721 if (ops->inner(ops+1, valp, &cp) == LT_IF) 722 return (LT_IF); 723 for (;;) { 724 cp = skipcomment(cp); 725 for (op = ops->op; op->str != NULL; op++) 726 if (strncmp(cp, op->str, strlen(op->str)) == 0) 727 break; 728 if (op->str == NULL) 729 break; 730 cp += strlen(op->str); 731 debug("eval%d %s", ops - eval_ops, op->str); 732 if (ops->inner(ops+1, &val, &cp) == LT_IF) 733 return (LT_IF); 734 *valp = op->fn(*valp, val); 735 } 736 737 *cpp = cp; 738 debug("eval%d = %d", ops - eval_ops, *valp); 739 return (*valp ? LT_TRUE : LT_FALSE); 740} 741 742/* 743 * Evaluate the expression on a #if or #elif line. If we can work out 744 * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we 745 * return just a generic LT_IF. 746 */ 747static Linetype 748ifeval(const char **cpp) 749{ 750 int ret; 751 int val; 752 753 debug("eval %s", *cpp); 754 keepthis = killconsts ? false : true; 755 ret = eval_table(eval_ops, &val, cpp); 756 return (keepthis ? LT_IF : ret); 757} 758 759/* 760 * Skip over comments and stop at the next character position that is 761 * not whitespace. Between calls we keep the comment state in a global 762 * variable, and we also make a note when we get a proper end-of-line. 763 * XXX: doesn't cope with the buffer splitting inside a state transition. 764 */ 765static const char * 766skipcomment(const char *cp) 767{ 768 if (text || ignoring[depth]) { 769 while (isspace((unsigned char)*cp)) 770 cp += 1; 771 return (cp); 772 } 773 while (*cp != '\0') 774 if (strncmp(cp, "\\\n", 2) == 0) 775 cp += 2; 776 else switch (incomment) { 777 case NO_COMMENT: 778 if (strncmp(cp, "/\\\n", 3) == 0) { 779 incomment = STARTING_COMMENT; 780 cp += 3; 781 } else if (strncmp(cp, "/*", 2) == 0) { 782 incomment = C_COMMENT; 783 cp += 2; 784 } else if (strncmp(cp, "//", 2) == 0) { 785 incomment = CXX_COMMENT; 786 cp += 2; 787 } else if (strncmp(cp, "\n", 1) == 0) { 788 linestate = LS_START; 789 cp += 1; 790 } else if (strchr(" \t", *cp) != NULL) { 791 cp += 1; 792 } else 793 return (cp); 794 continue; 795 case CXX_COMMENT: 796 if (strncmp(cp, "\n", 1) == 0) { 797 incomment = NO_COMMENT; 798 linestate = LS_START; 799 } 800 cp += 1; 801 continue; 802 case C_COMMENT: 803 if (strncmp(cp, "*\\\n", 3) == 0) { 804 incomment = FINISHING_COMMENT; 805 cp += 3; 806 } else if (strncmp(cp, "*/", 2) == 0) { 807 incomment = NO_COMMENT; 808 cp += 2; 809 } else 810 cp += 1; 811 continue; 812 case STARTING_COMMENT: 813 if (*cp == '*') { 814 incomment = C_COMMENT; 815 cp += 1; 816 } else if (*cp == '/') { 817 incomment = CXX_COMMENT; 818 cp += 1; 819 } else { 820 incomment = NO_COMMENT; 821 linestate = LS_DIRTY; 822 } 823 continue; 824 case FINISHING_COMMENT: 825 if (*cp == '/') { 826 incomment = NO_COMMENT; 827 cp += 1; 828 } else 829 incomment = C_COMMENT; 830 continue; 831 default: 832 /* bug */ 833 abort(); 834 } 835 return (cp); 836} 837 838/* 839 * Skip over an identifier. 840 */ 841static const char * 842skipsym(const char *cp) 843{ 844 while (!endsym(*cp)) 845 ++cp; 846 return (cp); 847} 848 849/* 850 * Look for the symbol in the symbol table. If is is found, we return 851 * the symbol table index, else we return -1. 852 */ 853static int 854findsym(const char *str) 855{ 856 const char *cp; 857 int symind; 858 859 cp = skipsym(str); 860 if (cp == str) 861 return (-1); 862 if (symlist) 863 printf("%.*s\n", (int)(cp-str), str); 864 for (symind = 0; symind < nsyms; ++symind) { 865 if (strlcmp(symname[symind], str, cp-str) == 0) { 866 debug("findsym %s %s", symname[symind], 867 value[symind] ? value[symind] : ""); 868 return (symind); 869 } 870 } 871 return (-1); 872} 873 874/* 875 * Add a symbol to the symbol table. 876 */ 877static void 878addsym(bool ignorethis, bool definethis, char *sym) 879{ 880 int symind; 881 char *val; 882 883 symind = findsym(sym); 884 if (symind < 0) { 885 if (nsyms >= MAXSYMS) 886 errx(2, "too many symbols"); 887 symind = nsyms++; 888 } 889 symname[symind] = sym; 890 ignore[symind] = ignorethis; 891 val = sym + (skipsym(sym) - sym); 892 if (definethis) { 893 if (*val == '=') { 894 value[symind] = val+1; 895 *val = '\0'; 896 } else if (*val == '\0') 897 value[symind] = ""; 898 else 899 usage(); 900 } else { 901 if (*val != '\0') 902 usage(); 903 value[symind] = NULL; 904 } 905} 906 907/* 908 * Compare s with n characters of t. 909 * The same as strncmp() except that it checks that s[n] == '\0'. 910 */ 911static int 912strlcmp(const char *s, const char *t, size_t n) 913{ 914 while (n-- && *t != '\0') 915 if (*s != *t) 916 return ((unsigned char)*s - (unsigned char)*t); 917 else 918 ++s, ++t; 919 return ((unsigned char)*s); 920} 921 922/* 923 * Diagnostics. 924 */ 925static void 926debug(const char *msg, ...) 927{ 928 va_list ap; 929 930 if (debugging) { 931 va_start(ap, msg); 932 vwarnx(msg, ap); 933 va_end(ap); 934 } 935} 936 937static void 938error(const char *msg) 939{ 940 if (depth == 0) 941 warnx("%s: %d: %s", filename, linenum, msg); 942 else 943 warnx("%s: %d: %s (#if line %d depth %d)", 944 filename, linenum, msg, stifline[depth], depth); 945 errx(2, "output may be truncated"); 946} 947