unifdef.c revision 117076
1/*
2 * Copyright (c) 2002, 2003 Tony Finch <dot@dotat.at>
3 * Copyright (c) 1985, 1993
4 *	The Regents of the University of California.  All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Dave Yost. It was rewritten to support ANSI C by Tony Finch.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 */
37
38#include <sys/cdefs.h>
39
40#ifndef lint
41#if 0
42static const char copyright[] =
43"@(#) Copyright (c) 1985, 1993\n\
44	The Regents of the University of California.  All rights reserved.\n";
45#endif
46#ifdef __IDSTRING
47__IDSTRING(Berkeley, "@(#)unifdef.c	8.1 (Berkeley) 6/6/93");
48__IDSTRING(NetBSD, "$NetBSD: unifdef.c,v 1.8 2000/07/03 02:51:36 matt Exp $");
49__IDSTRING(dotat, "$dotat: things/unifdef.c,v 1.156 2003/06/30 14:30:54 fanf2 Exp $");
50#endif
51#endif /* not lint */
52#ifdef __FBSDID
53__FBSDID("$FreeBSD: head/usr.bin/unifdef/unifdef.c 117076 2003-06-30 14:46:25Z fanf $");
54#endif
55
56/*
57 * unifdef - remove ifdef'ed lines
58 *
59 *  Wishlist:
60 *      provide an option which will append the name of the
61 *        appropriate symbol after #else's and #endif's
62 *      provide an option which will check symbols after
63 *        #else's and #endif's to see that they match their
64 *        corresponding #ifdef or #ifndef
65 *      generate #line directives in place of deleted code
66 *
67 *   The first two items above require better buffer handling, which would
68 *     also make it possible to handle all "dodgy" directives correctly.
69 */
70
71#include <ctype.h>
72#include <err.h>
73#include <stdarg.h>
74#include <stdbool.h>
75#include <stdio.h>
76#include <stdlib.h>
77#include <string.h>
78#include <unistd.h>
79
80/* types of input lines: */
81typedef enum {
82	LT_TRUEI,		/* a true #if with ignore flag */
83	LT_FALSEI,		/* a false #if with ignore flag */
84	LT_IF,			/* an unknown #if */
85	LT_TRUE,		/* a true #if */
86	LT_FALSE,		/* a false #if */
87	LT_ELIF,		/* an unknown #elif */
88	LT_ELTRUE,		/* a true #elif */
89	LT_ELFALSE,		/* a false #elif */
90	LT_ELSE,		/* #else */
91	LT_ENDIF,		/* #endif */
92	LT_DODGY,		/* flag: directive is not on one line */
93	LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
94	LT_PLAIN,		/* ordinary line */
95	LT_EOF,			/* end of file */
96	LT_COUNT
97} Linetype;
98
99static char const * const linetype_name[] = {
100	"TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
101	"ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
102	"DODGY TRUEI", "DODGY FALSEI",
103	"DODGY IF", "DODGY TRUE", "DODGY FALSE",
104	"DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
105	"DODGY ELSE", "DODGY ENDIF",
106	"PLAIN", "EOF"
107};
108
109/* state of #if processing */
110typedef enum {
111	IS_OUTSIDE,
112	IS_FALSE_PREFIX,	/* false #if followed by false #elifs */
113	IS_TRUE_PREFIX,		/* first non-false #(el)if is true */
114	IS_PASS_MIDDLE,		/* first non-false #(el)if is unknown */
115	IS_FALSE_MIDDLE,	/* a false #elif after a pass state */
116	IS_TRUE_MIDDLE,		/* a true #elif after a pass state */
117	IS_PASS_ELSE,		/* an else after a pass state */
118	IS_FALSE_ELSE,		/* an else after a true state */
119	IS_TRUE_ELSE,		/* an else after only false states */
120	IS_FALSE_TRAILER,	/* #elifs after a true are false */
121	IS_COUNT
122} Ifstate;
123
124static char const * const ifstate_name[] = {
125	"OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
126	"PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
127	"PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
128	"FALSE_TRAILER"
129};
130
131/* state of comment parser */
132typedef enum {
133	NO_COMMENT = false,	/* outside a comment */
134	C_COMMENT,		/* in a comment like this one */
135	CXX_COMMENT,		/* between // and end of line */
136	STARTING_COMMENT,	/* just after slash-backslash-newline */
137	FINISHING_COMMENT	/* star-backslash-newline in a C comment */
138} Comment_state;
139
140static char const * const comment_name[] = {
141	"NO", "C", "CXX", "STARTING", "FINISHING"
142};
143
144/* state of preprocessor line parser */
145typedef enum {
146	LS_START,		/* only space and comments on this line */
147	LS_HASH,		/* only space, comments, and a hash */
148	LS_DIRTY		/* this line can't be a preprocessor line */
149} Line_state;
150
151static char const * const linestate_name[] = {
152	"START", "HASH", "DIRTY"
153};
154
155/*
156 * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
157 */
158#define	MAXDEPTH        64			/* maximum #if nesting */
159#define	MAXLINE         4096			/* maximum length of line */
160#define	MAXSYMS         4096			/* maximum number of symbols */
161
162/*
163 * Sometimes when editing a keyword the replacement text is longer, so
164 * we leave some space at the end of the tline buffer to accommodate this.
165 */
166#define	EDITSLOP        10
167
168/*
169 * Globals.
170 */
171
172static bool             complement;		/* -c: do the complement */
173static bool             debugging;		/* -d: debugging reports */
174static bool             iocccok;		/* -e: fewer IOCCC errors */
175static bool             killconsts;		/* -k: eval constant #ifs */
176static bool             lnblank;		/* -l: blank deleted lines */
177static bool             symlist;		/* -s: output symbol list */
178static bool             text;			/* -t: this is a text file */
179
180static const char      *symname[MAXSYMS];	/* symbol name */
181static const char      *value[MAXSYMS];		/* -Dsym=value */
182static bool             ignore[MAXSYMS];	/* -iDsym or -iUsym */
183static int              nsyms;			/* number of symbols */
184
185static FILE            *input;			/* input file pointer */
186static const char      *filename;		/* input file name */
187static int              linenum;		/* current line number */
188
189static char             tline[MAXLINE+EDITSLOP];/* input buffer plus space */
190static char            *keyword;		/* used for editing #elif's */
191
192static Comment_state    incomment;		/* comment parser state */
193static Line_state       linestate;		/* #if line parser state */
194static Ifstate          ifstate[MAXDEPTH];	/* #if processor state */
195static bool             ignoring[MAXDEPTH];	/* ignore comments state */
196static int              stifline[MAXDEPTH];	/* start of current #if */
197static int              depth;			/* current #if nesting */
198static bool             keepthis;		/* don't delete constant #if */
199
200static int              exitstat;		/* program exit status */
201
202static void             addsym(bool, bool, char *);
203static void             debug(const char *, ...);
204static void             error(const char *);
205static int              findsym(const char *);
206static void             flushline(bool);
207static Linetype         getline(void);
208static Linetype         ifeval(const char **);
209static void             ignoreoff(void);
210static void             ignoreon(void);
211static void             keywordedit(const char *);
212static void             nest(void);
213static void             process(void);
214static const char      *skipcomment(const char *);
215static const char      *skipsym(const char *);
216static void             state(Ifstate);
217static int              strlcmp(const char *, const char *, size_t);
218static void             usage(void);
219
220#define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_')
221
222/*
223 * The main program.
224 */
225int
226main(int argc, char *argv[])
227{
228	int opt;
229
230	while ((opt = getopt(argc, argv, "i:D:U:I:cdeklst")) != -1)
231		switch (opt) {
232		case 'i': /* treat stuff controlled by these symbols as text */
233			/*
234			 * For strict backwards-compatibility the U or D
235			 * should be immediately after the -i but it doesn't
236			 * matter much if we relax that requirement.
237			 */
238			opt = *optarg++;
239			if (opt == 'D')
240				addsym(true, true, optarg);
241			else if (opt == 'U')
242				addsym(true, false, optarg);
243			else
244				usage();
245			break;
246		case 'D': /* define a symbol */
247			addsym(false, true, optarg);
248			break;
249		case 'U': /* undef a symbol */
250			addsym(false, false, optarg);
251			break;
252		case 'I':
253			/* no-op for compatibility with cpp */
254			break;
255		case 'c': /* treat -D as -U and vice versa */
256			complement = true;
257			break;
258		case 'd':
259			debugging = true;
260			break;
261		case 'e': /* fewer errors from dodgy lines */
262			iocccok = true;
263			break;
264		case 'k': /* process constant #ifs */
265			killconsts = true;
266			break;
267		case 'l': /* blank deleted lines instead of omitting them */
268			lnblank = true;
269			break;
270		case 's': /* only output list of symbols that control #ifs */
271			symlist = true;
272			break;
273		case 't': /* don't parse C comments */
274			text = true;
275			break;
276		default:
277			usage();
278		}
279	argc -= optind;
280	argv += optind;
281	if (nsyms == 0 && !symlist) {
282		warnx("must -D or -U at least one symbol");
283		usage();
284	}
285	if (argc > 1) {
286		errx(2, "can only do one file");
287	} else if (argc == 1 && strcmp(*argv, "-") != 0) {
288		filename = *argv;
289		if ((input = fopen(filename, "r")) != NULL) {
290			process();
291			(void) fclose(input);
292		} else
293			err(2, "can't open %s", *argv);
294	} else {
295		filename = "[stdin]";
296		input = stdin;
297		process();
298	}
299
300	exit(exitstat);
301}
302
303static void
304usage(void)
305{
306	fprintf(stderr, "usage: unifdef [-cdeklst]"
307	    " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n");
308	exit(2);
309}
310
311/*
312 * A state transition function alters the global #if processing state
313 * in a particular way. The table below is indexed by the current
314 * processing state and the type of the current line. A NULL entry
315 * indicates that processing is complete.
316 *
317 * Nesting is handled by keeping a stack of states; some transition
318 * functions increase or decrease the depth. They also maintain the
319 * ignore state on a stack. In some complicated cases they have to
320 * alter the preprocessor directive, as follows.
321 *
322 * When we have processed a group that starts off with a known-false
323 * #if/#elif sequence (which has therefore been deleted) followed by a
324 * #elif that we don't understand and therefore must keep, we edit the
325 * latter into a #if to keep the nesting correct.
326 *
327 * When we find a true #elif in a group, the following block will
328 * always be kept and the rest of the sequence after the next #elif or
329 * #else will be discarded. We edit the #elif into a #else and the
330 * following directive to #endif since this has the desired behaviour.
331 *
332 * "Dodgy" directives are split across multiple lines, the most common
333 * example being a multi-line comment hanging off the right of the
334 * directive. We can handle them correctly only if there is no change
335 * from printing to dropping (or vice versa) caused by that directive.
336 * If the directive is the first of a group we have a choice between
337 * failing with an error, or passing it through unchanged instead of
338 * evaluating it. The latter is not the default to avoid questions from
339 * users about unifdef unexpectedly leaving behind preprocessor directives.
340 */
341typedef void state_fn(void);
342
343/* report an error */
344static void Eelif (void) { error("Inappropriate #elif"); }
345static void Eelse (void) { error("Inappropriate #else"); }
346static void Eendif(void) { error("Inappropriate #endif"); }
347static void Eeof  (void) { error("Premature EOF"); }
348static void Eioccc(void) { error("Obfuscated preprocessor control line"); }
349/* plain line handling */
350static void print (void) { flushline(true); }
351static void drop  (void) { flushline(false); }
352/* output lacks group's start line */
353static void Strue (void) { drop();  ignoreoff(); state(IS_TRUE_PREFIX); }
354static void Sfalse(void) { drop();  ignoreoff(); state(IS_FALSE_PREFIX); }
355static void Selse (void) { drop();               state(IS_TRUE_ELSE); }
356/* print/pass this block */
357static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); }
358static void Pelse (void) { print();              state(IS_PASS_ELSE); }
359static void Pendif(void) { print(); --depth; }
360/* discard this block */
361static void Dfalse(void) { drop();  ignoreoff(); state(IS_FALSE_TRAILER); }
362static void Delif (void) { drop();  ignoreoff(); state(IS_FALSE_MIDDLE); }
363static void Delse (void) { drop();               state(IS_FALSE_ELSE); }
364static void Dendif(void) { drop();  --depth; }
365/* first line of group */
366static void Fdrop (void) { nest();  Dfalse(); }
367static void Fpass (void) { nest();  Pelif(); }
368static void Ftrue (void) { nest();  Strue(); }
369static void Ffalse(void) { nest();  Sfalse(); }
370/* variable pedantry for obfuscated lines */
371static void Oiffy (void) { if (iocccok) Fpass(); else Eioccc(); ignoreon(); }
372static void Oif   (void) { if (iocccok) Fpass(); else Eioccc(); }
373static void Oelif (void) { if (iocccok) Pelif(); else Eioccc(); }
374/* ignore comments in this block */
375static void Idrop (void) { Fdrop();  ignoreon(); }
376static void Itrue (void) { Ftrue();  ignoreon(); }
377static void Ifalse(void) { Ffalse(); ignoreon(); }
378/* edit this line */
379static void Mpass (void) { strncpy(keyword, "if  ", 4); Pelif(); }
380static void Mtrue (void) { keywordedit("else\n");  state(IS_TRUE_MIDDLE); }
381static void Melif (void) { keywordedit("endif\n"); state(IS_FALSE_TRAILER); }
382static void Melse (void) { keywordedit("endif\n"); state(IS_FALSE_ELSE); }
383
384static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
385/* IS_OUTSIDE */
386{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
387  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eendif,
388  print, NULL },
389/* IS_FALSE_PREFIX */
390{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
391  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
392  drop,  Eeof },
393/* IS_TRUE_PREFIX */
394{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
395  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
396  print, Eeof },
397/* IS_PASS_MIDDLE */
398{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
399  Oiffy, Oiffy, Fpass, Oif,   Oif,   Pelif, Oelif, Oelif, Pelse, Pendif,
400  print, Eeof },
401/* IS_FALSE_MIDDLE */
402{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
403  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
404  drop,  Eeof },
405/* IS_TRUE_MIDDLE */
406{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
407  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
408  print, Eeof },
409/* IS_PASS_ELSE */
410{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
411  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Pendif,
412  print, Eeof },
413/* IS_FALSE_ELSE */
414{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
415  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
416  drop,  Eeof },
417/* IS_TRUE_ELSE */
418{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
419  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eioccc,
420  print, Eeof },
421/* IS_FALSE_TRAILER */
422{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
423  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
424  drop,  Eeof }
425/*TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF
426  TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF (DODGY)
427  PLAIN  EOF */
428};
429
430/*
431 * State machine utility functions
432 */
433static void
434ignoreoff(void)
435{
436	ignoring[depth] = ignoring[depth-1];
437}
438static void
439ignoreon(void)
440{
441	ignoring[depth] = true;
442}
443static void
444keywordedit(const char *replacement)
445{
446	strlcpy(keyword, replacement, tline + sizeof(tline) - keyword);
447	print();
448}
449static void
450nest(void)
451{
452	depth += 1;
453	if (depth >= MAXDEPTH)
454		error("Too many levels of nesting");
455	stifline[depth] = linenum;
456}
457static void
458state(Ifstate is)
459{
460	ifstate[depth] = is;
461}
462
463/*
464 * Write a line to the output or not, according to command line options.
465 */
466static void
467flushline(bool keep)
468{
469	if (symlist)
470		return;
471	if (keep ^ complement)
472		fputs(tline, stdout);
473	else {
474		if (lnblank)
475			putc('\n', stdout);
476		exitstat = 1;
477	}
478}
479
480/*
481 * The driver for the state machine.
482 */
483static void
484process(void)
485{
486	Linetype lineval;
487	state_fn *trans;
488
489	for (;;) {
490		linenum++;
491		lineval = getline();
492		trans = trans_table[ifstate[depth]][lineval];
493		if (trans == NULL)
494			break;
495		trans();
496		debug("process %s -> %s depth %d",
497		    linetype_name[lineval],
498		    ifstate_name[ifstate[depth]], depth);
499	}
500	if (incomment)
501		error("EOF in comment");
502}
503
504/*
505 * Parse a line and determine its type. We keep the preprocessor line
506 * parser state between calls in the global variable linestate, with
507 * help from skipcomment().
508 */
509static Linetype
510getline(void)
511{
512	const char *cp;
513	int cursym;
514	int kwlen;
515	Linetype retval;
516	Comment_state wascomment;
517
518	if (fgets(tline, MAXLINE, input) == NULL)
519		return (LT_EOF);
520	retval = LT_PLAIN;
521	wascomment = incomment;
522	cp = skipcomment(tline);
523	if (linestate == LS_START) {
524		if (*cp == '#') {
525			linestate = LS_HASH;
526			cp = skipcomment(cp + 1);
527		} else if (*cp != '\0')
528			linestate = LS_DIRTY;
529	}
530	if (!incomment && linestate == LS_HASH) {
531		keyword = tline + (cp - tline);
532		cp = skipsym(cp);
533		kwlen = cp - keyword;
534		/* no way can we deal with a continuation inside a keyword */
535		if (strncmp(cp, "\\\n", 2) == 0)
536			Eioccc();
537		if (strlcmp("ifdef", keyword, kwlen) == 0 ||
538		    strlcmp("ifndef", keyword, kwlen) == 0) {
539			cp = skipcomment(cp);
540			if ((cursym = findsym(cp)) < 0)
541				retval = LT_IF;
542			else {
543				retval = (keyword[2] == 'n')
544				    ? LT_FALSE : LT_TRUE;
545				if (value[cursym] == NULL)
546					retval = (retval == LT_TRUE)
547					    ? LT_FALSE : LT_TRUE;
548				if (ignore[cursym])
549					retval = (retval == LT_TRUE)
550					    ? LT_TRUEI : LT_FALSEI;
551			}
552			cp = skipsym(cp);
553		} else if (strlcmp("if", keyword, kwlen) == 0)
554			retval = ifeval(&cp);
555		else if (strlcmp("elif", keyword, kwlen) == 0)
556			retval = ifeval(&cp) - LT_IF + LT_ELIF;
557		else if (strlcmp("else", keyword, kwlen) == 0)
558			retval = LT_ELSE;
559		else if (strlcmp("endif", keyword, kwlen) == 0)
560			retval = LT_ENDIF;
561		else {
562			linestate = LS_DIRTY;
563			retval = LT_PLAIN;
564		}
565		cp = skipcomment(cp);
566		if (*cp != '\0') {
567			linestate = LS_DIRTY;
568			if (retval == LT_TRUE || retval == LT_FALSE ||
569			    retval == LT_TRUEI || retval == LT_FALSEI)
570				retval = LT_IF;
571			if (retval == LT_ELTRUE || retval == LT_ELFALSE)
572				retval = LT_ELIF;
573		}
574		if (retval != LT_PLAIN && (wascomment || incomment)) {
575			retval += LT_DODGY;
576			if (incomment)
577				linestate = LS_DIRTY;
578		}
579		/* skipcomment should have changed the state */
580		if (linestate == LS_HASH)
581			abort(); /* bug */
582	}
583	if (linestate == LS_DIRTY) {
584		while (*cp != '\0')
585			cp = skipcomment(cp + 1);
586	}
587	debug("parser %s comment %s line",
588	    comment_name[incomment], linestate_name[linestate]);
589	return (retval);
590}
591
592/*
593 * These are the binary operators that are supported by the expression
594 * evaluator. Note that if support for division is added then we also
595 * need short-circuiting booleans because of divide-by-zero.
596 */
597static int op_lt(int a, int b) { return (a < b); }
598static int op_gt(int a, int b) { return (a > b); }
599static int op_le(int a, int b) { return (a <= b); }
600static int op_ge(int a, int b) { return (a >= b); }
601static int op_eq(int a, int b) { return (a == b); }
602static int op_ne(int a, int b) { return (a != b); }
603static int op_or(int a, int b) { return (a || b); }
604static int op_and(int a, int b) { return (a && b); }
605
606/*
607 * An evaluation function takes three arguments, as follows: (1) a pointer to
608 * an element of the precedence table which lists the operators at the current
609 * level of precedence; (2) a pointer to an integer which will receive the
610 * value of the expression; and (3) a pointer to a char* that points to the
611 * expression to be evaluated and that is updated to the end of the expression
612 * when evaluation is complete. The function returns LT_FALSE if the value of
613 * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the
614 * expression could not be evaluated.
615 */
616struct ops;
617
618typedef Linetype eval_fn(const struct ops *, int *, const char **);
619
620static eval_fn eval_table, eval_unary;
621
622/*
623 * The precedence table. Expressions involving binary operators are evaluated
624 * in a table-driven way by eval_table. When it evaluates a subexpression it
625 * calls the inner function with its first argument pointing to the next
626 * element of the table. Innermost expressions have special non-table-driven
627 * handling.
628 */
629static const struct ops {
630	eval_fn *inner;
631	struct op {
632		const char *str;
633		int (*fn)(int, int);
634	} op[5];
635} eval_ops[] = {
636	{ eval_table, { { "||", op_or } } },
637	{ eval_table, { { "&&", op_and } } },
638	{ eval_table, { { "==", op_eq },
639			{ "!=", op_ne } } },
640	{ eval_unary, { { "<=", op_le },
641			{ ">=", op_ge },
642			{ "<", op_lt },
643			{ ">", op_gt } } }
644};
645
646/*
647 * Function for evaluating the innermost parts of expressions,
648 * viz. !expr (expr) defined(symbol) symbol number
649 * We reset the keepthis flag when we find a non-constant subexpression.
650 */
651static Linetype
652eval_unary(const struct ops *ops, int *valp, const char **cpp)
653{
654	const char *cp;
655	char *ep;
656	int sym;
657
658	cp = skipcomment(*cpp);
659	if (*cp == '!') {
660		debug("eval%d !", ops - eval_ops);
661		cp++;
662		if (eval_unary(ops, valp, &cp) == LT_IF)
663			return (LT_IF);
664		*valp = !*valp;
665	} else if (*cp == '(') {
666		cp++;
667		debug("eval%d (", ops - eval_ops);
668		if (eval_table(eval_ops, valp, &cp) == LT_IF)
669			return (LT_IF);
670		cp = skipcomment(cp);
671		if (*cp++ != ')')
672			return (LT_IF);
673	} else if (isdigit((unsigned char)*cp)) {
674		debug("eval%d number", ops - eval_ops);
675		*valp = strtol(cp, &ep, 0);
676		cp = skipsym(cp);
677	} else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) {
678		cp = skipcomment(cp+7);
679		debug("eval%d defined", ops - eval_ops);
680		if (*cp++ != '(')
681			return (LT_IF);
682		cp = skipcomment(cp);
683		sym = findsym(cp);
684		if (sym < 0 && !symlist)
685			return (LT_IF);
686		*valp = (value[sym] != NULL);
687		cp = skipsym(cp);
688		cp = skipcomment(cp);
689		if (*cp++ != ')')
690			return (LT_IF);
691		keepthis = false;
692	} else if (!endsym(*cp)) {
693		debug("eval%d symbol", ops - eval_ops);
694		sym = findsym(cp);
695		if (sym < 0 && !symlist)
696			return (LT_IF);
697		if (value[sym] == NULL)
698			*valp = 0;
699		else {
700			*valp = strtol(value[sym], &ep, 0);
701			if (*ep != '\0' || ep == value[sym])
702				return (LT_IF);
703		}
704		cp = skipsym(cp);
705		keepthis = false;
706	} else
707		return (LT_IF);
708
709	*cpp = cp;
710	debug("eval%d = %d", ops - eval_ops, *valp);
711	return (*valp ? LT_TRUE : LT_FALSE);
712}
713
714/*
715 * Table-driven evaluation of binary operators.
716 */
717static Linetype
718eval_table(const struct ops *ops, int *valp, const char **cpp)
719{
720	const struct op *op;
721	const char *cp;
722	int val;
723
724	debug("eval%d", ops - eval_ops);
725	cp = *cpp;
726	if (ops->inner(ops+1, valp, &cp) == LT_IF)
727		return (LT_IF);
728	for (;;) {
729		cp = skipcomment(cp);
730		for (op = ops->op; op->str != NULL; op++)
731			if (strncmp(cp, op->str, strlen(op->str)) == 0)
732				break;
733		if (op->str == NULL)
734			break;
735		cp += strlen(op->str);
736		debug("eval%d %s", ops - eval_ops, op->str);
737		if (ops->inner(ops+1, &val, &cp) == LT_IF)
738			return (LT_IF);
739		*valp = op->fn(*valp, val);
740	}
741
742	*cpp = cp;
743	debug("eval%d = %d", ops - eval_ops, *valp);
744	return (*valp ? LT_TRUE : LT_FALSE);
745}
746
747/*
748 * Evaluate the expression on a #if or #elif line. If we can work out
749 * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
750 * return just a generic LT_IF.
751 */
752static Linetype
753ifeval(const char **cpp)
754{
755	int ret;
756	int val;
757
758	debug("eval %s", *cpp);
759	keepthis = killconsts ? false : true;
760	ret = eval_table(eval_ops, &val, cpp);
761	return (keepthis ? LT_IF : ret);
762}
763
764/*
765 * Skip over comments and stop at the next character position that is
766 * not whitespace. Between calls we keep the comment state in the
767 * global variable incomment, and we also adjust the global variable
768 * linestate when we see a newline.
769 * XXX: doesn't cope with the buffer splitting inside a state transition.
770 */
771static const char *
772skipcomment(const char *cp)
773{
774	if (text || ignoring[depth]) {
775		for (; isspace((unsigned char)*cp); cp++)
776			if (*cp == '\n')
777				linestate = LS_START;
778		return (cp);
779	}
780	while (*cp != '\0')
781		if (strncmp(cp, "\\\n", 2) == 0)
782			cp += 2;
783		else switch (incomment) {
784		case NO_COMMENT:
785			if (strncmp(cp, "/\\\n", 3) == 0) {
786				incomment = STARTING_COMMENT;
787				cp += 3;
788			} else if (strncmp(cp, "/*", 2) == 0) {
789				incomment = C_COMMENT;
790				cp += 2;
791			} else if (strncmp(cp, "//", 2) == 0) {
792				incomment = CXX_COMMENT;
793				cp += 2;
794			} else if (strncmp(cp, "\n", 1) == 0) {
795				linestate = LS_START;
796				cp += 1;
797			} else if (strchr(" \t", *cp) != NULL) {
798				cp += 1;
799			} else
800				return (cp);
801			continue;
802		case CXX_COMMENT:
803			if (strncmp(cp, "\n", 1) == 0) {
804				incomment = NO_COMMENT;
805				linestate = LS_START;
806			}
807			cp += 1;
808			continue;
809		case C_COMMENT:
810			if (strncmp(cp, "*\\\n", 3) == 0) {
811				incomment = FINISHING_COMMENT;
812				cp += 3;
813			} else if (strncmp(cp, "*/", 2) == 0) {
814				incomment = NO_COMMENT;
815				cp += 2;
816			} else
817				cp += 1;
818			continue;
819		case STARTING_COMMENT:
820			if (*cp == '*') {
821				incomment = C_COMMENT;
822				cp += 1;
823			} else if (*cp == '/') {
824				incomment = CXX_COMMENT;
825				cp += 1;
826			} else {
827				incomment = NO_COMMENT;
828				linestate = LS_DIRTY;
829			}
830			continue;
831		case FINISHING_COMMENT:
832			if (*cp == '/') {
833				incomment = NO_COMMENT;
834				cp += 1;
835			} else
836				incomment = C_COMMENT;
837			continue;
838		default:
839			/* bug */
840			abort();
841		}
842	return (cp);
843}
844
845/*
846 * Skip over an identifier.
847 */
848static const char *
849skipsym(const char *cp)
850{
851	while (!endsym(*cp))
852		++cp;
853	return (cp);
854}
855
856/*
857 * Look for the symbol in the symbol table. If is is found, we return
858 * the symbol table index, else we return -1.
859 */
860static int
861findsym(const char *str)
862{
863	const char *cp;
864	int symind;
865
866	cp = skipsym(str);
867	if (cp == str)
868		return (-1);
869	if (symlist)
870		printf("%.*s\n", (int)(cp-str), str);
871	for (symind = 0; symind < nsyms; ++symind) {
872		if (strlcmp(symname[symind], str, cp-str) == 0) {
873			debug("findsym %s %s", symname[symind],
874			    value[symind] ? value[symind] : "");
875			return (symind);
876		}
877	}
878	return (-1);
879}
880
881/*
882 * Add a symbol to the symbol table.
883 */
884static void
885addsym(bool ignorethis, bool definethis, char *sym)
886{
887	int symind;
888	char *val;
889
890	symind = findsym(sym);
891	if (symind < 0) {
892		if (nsyms >= MAXSYMS)
893			errx(2, "too many symbols");
894		symind = nsyms++;
895	}
896	symname[symind] = sym;
897	ignore[symind] = ignorethis;
898	val = sym + (skipsym(sym) - sym);
899	if (definethis) {
900		if (*val == '=') {
901			value[symind] = val+1;
902			*val = '\0';
903		} else if (*val == '\0')
904			value[symind] = "";
905		else
906			usage();
907	} else {
908		if (*val != '\0')
909			usage();
910		value[symind] = NULL;
911	}
912}
913
914/*
915 * Compare s with n characters of t.
916 * The same as strncmp() except that it checks that s[n] == '\0'.
917 */
918static int
919strlcmp(const char *s, const char *t, size_t n)
920{
921	while (n-- && *t != '\0')
922		if (*s != *t)
923			return ((unsigned char)*s - (unsigned char)*t);
924		else
925			++s, ++t;
926	return ((unsigned char)*s);
927}
928
929/*
930 * Diagnostics.
931 */
932static void
933debug(const char *msg, ...)
934{
935	va_list ap;
936
937	if (debugging) {
938		va_start(ap, msg);
939		vwarnx(msg, ap);
940		va_end(ap);
941	}
942}
943
944static void
945error(const char *msg)
946{
947	if (depth == 0)
948		warnx("%s: %d: %s", filename, linenum, msg);
949	else
950		warnx("%s: %d: %s (#if line %d depth %d)",
951		    filename, linenum, msg, stifline[depth], depth);
952	errx(2, "output may be truncated");
953}
954