1/*	$NetBSD: unifdef.c,v 1.21 2012/02/29 23:35:10 joerg Exp $	*/
2
3/*
4 * Copyright (c) 1985, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Dave Yost. It was rewritten to support ANSI C by Tony Finch.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35/*
36 * Copyright (c) 2002, 2003 Tony Finch <dot@dotat.at>
37 *
38 * This code is derived from software contributed to Berkeley by
39 * Dave Yost. It was rewritten to support ANSI C by Tony Finch.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 *    notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 *    notice, this list of conditions and the following disclaimer in the
48 *    documentation and/or other materials provided with the distribution.
49 * 3. All advertising materials mentioning features or use of this software
50 *    must display the following acknowledgement:
51 *	This product includes software developed by the University of
52 *	California, Berkeley and its contributors.
53 * 4. Neither the name of the University nor the names of its contributors
54 *    may be used to endorse or promote products derived from this software
55 *    without specific prior written permission.
56 *
57 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
58 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
59 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
60 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
61 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
62 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
63 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
64 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
65 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
66 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
67 * SUCH DAMAGE.
68 */
69
70#include <sys/cdefs.h>
71
72#ifndef lint
73#if 0
74static const char copyright[] =
75"@(#) Copyright (c) 1985, 1993\n\
76	The Regents of the University of California.  All rights reserved.\n";
77#endif
78#ifdef __IDSTRING
79__IDSTRING(Berkeley, "@(#)unifdef.c	8.1 (Berkeley) 6/6/93");
80__IDSTRING(NetBSD, "$NetBSD: unifdef.c,v 1.21 2012/02/29 23:35:10 joerg Exp $");
81__IDSTRING(dotat, "$dotat: things/unifdef.c,v 1.161 2003/07/01 15:32:48 fanf2 Exp $");
82#endif
83#endif /* not lint */
84#ifdef __FBSDID
85__FBSDID("$FreeBSD: src/usr.bin/unifdef/unifdef.c,v 1.18 2003/07/01 15:30:43 fanf Exp $");
86#endif
87
88/*
89 * unifdef - remove ifdef'ed lines
90 *
91 *  Wishlist:
92 *      provide an option which will append the name of the
93 *        appropriate symbol after #else's and #endif's
94 *      provide an option which will check symbols after
95 *        #else's and #endif's to see that they match their
96 *        corresponding #ifdef or #ifndef
97 *      generate #line directives in place of deleted code
98 *
99 *   The first two items above require better buffer handling, which would
100 *     also make it possible to handle all "dodgy" directives correctly.
101 */
102
103#include <ctype.h>
104#include <err.h>
105#include <libgen.h>
106#include <stdarg.h>
107#include <stdio.h>
108#include <stdlib.h>
109#include <string.h>
110#include <unistd.h>
111
112#include <sys/param.h>
113#include <sys/stat.h>
114
115#include "stdbool.h"
116
117/* types of input lines: */
118typedef enum {
119	LT_TRUEI,		/* a true #if with ignore flag */
120	LT_FALSEI,		/* a false #if with ignore flag */
121	LT_IF,			/* an unknown #if */
122	LT_TRUE,		/* a true #if */
123	LT_FALSE,		/* a false #if */
124	LT_ELIF,		/* an unknown #elif */
125	LT_ELTRUE,		/* a true #elif */
126	LT_ELFALSE,		/* a false #elif */
127	LT_ELSE,		/* #else */
128	LT_ENDIF,		/* #endif */
129	LT_DODGY,		/* flag: directive is not on one line */
130	LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
131	LT_PLAIN,		/* ordinary line */
132	LT_EOF,			/* end of file */
133	LT_COUNT
134} Linetype;
135
136static char const * const linetype_name[] = {
137	"TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
138	"ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
139	"DODGY TRUEI", "DODGY FALSEI",
140	"DODGY IF", "DODGY TRUE", "DODGY FALSE",
141	"DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
142	"DODGY ELSE", "DODGY ENDIF",
143	"PLAIN", "EOF"
144};
145
146/* state of #if processing */
147typedef enum {
148	IS_OUTSIDE,
149	IS_FALSE_PREFIX,	/* false #if followed by false #elifs */
150	IS_TRUE_PREFIX,		/* first non-false #(el)if is true */
151	IS_PASS_MIDDLE,		/* first non-false #(el)if is unknown */
152	IS_FALSE_MIDDLE,	/* a false #elif after a pass state */
153	IS_TRUE_MIDDLE,		/* a true #elif after a pass state */
154	IS_PASS_ELSE,		/* an else after a pass state */
155	IS_FALSE_ELSE,		/* an else after a true state */
156	IS_TRUE_ELSE,		/* an else after only false states */
157	IS_FALSE_TRAILER,	/* #elifs after a true are false */
158	IS_COUNT
159} Ifstate;
160
161static char const * const ifstate_name[] = {
162	"OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
163	"PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
164	"PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
165	"FALSE_TRAILER"
166};
167
168/* state of comment parser */
169typedef enum {
170	NO_COMMENT = false,	/* outside a comment */
171	C_COMMENT,		/* in a comment like this one */
172	CXX_COMMENT,		/* between // and end of line */
173	STARTING_COMMENT,	/* just after slash-backslash-newline */
174	FINISHING_COMMENT	/* star-backslash-newline in a C comment */
175} Comment_state;
176
177static char const * const comment_name[] = {
178	"NO", "C", "CXX", "STARTING", "FINISHING"
179};
180
181/* state of preprocessor line parser */
182typedef enum {
183	LS_START,		/* only space and comments on this line */
184	LS_HASH,		/* only space, comments, and a hash */
185	LS_DIRTY		/* this line can't be a preprocessor line */
186} Line_state;
187
188static char const * const linestate_name[] = {
189	"START", "HASH", "DIRTY"
190};
191
192/*
193 * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
194 */
195#define	MAXDEPTH        64			/* maximum #if nesting */
196#define	MAXLINE         4096			/* maximum length of line */
197#define	MAXSYMS         4096			/* maximum number of symbols */
198
199/*
200 * Sometimes when editing a keyword the replacement text is longer, so
201 * we leave some space at the end of the tline buffer to accommodate this.
202 */
203#define	EDITSLOP        10
204
205/*
206 * Globals.
207 */
208
209static bool             complement;		/* -c: do the complement */
210static bool             debugging;		/* -d: debugging reports */
211static bool             iocccok;		/* -e: fewer IOCCC errors */
212static bool             killconsts;		/* -k: eval constant #ifs */
213static bool             lnblank;		/* -l: blank deleted lines */
214static bool             symlist;		/* -s: output symbol list */
215static bool             text;			/* -t: this is a text file */
216
217static const char      *symname[MAXSYMS];	/* symbol name */
218static const char      *value[MAXSYMS];		/* -Dsym=value */
219static bool             ignore[MAXSYMS];	/* -iDsym or -iUsym */
220static int              nsyms;			/* number of symbols */
221
222static FILE            *input;			/* input file pointer */
223static FILE            *output;			/* output file pointer */
224static const char      *filename;		/* input file name */
225static char            *ofilename;		/* output file name */
226static char             tmpname[MAXPATHLEN];	/* used when overwriting */
227static int              linenum;		/* current line number */
228static int              overwriting;		/* output overwrites input */
229
230static char             tline[MAXLINE+EDITSLOP];/* input buffer plus space */
231static char            *keyword;		/* used for editing #elif's */
232
233static Comment_state    incomment;		/* comment parser state */
234static Line_state       linestate;		/* #if line parser state */
235static Ifstate          ifstate[MAXDEPTH];	/* #if processor state */
236static bool             ignoring[MAXDEPTH];	/* ignore comments state */
237static int              stifline[MAXDEPTH];	/* start of current #if */
238static int              depth;			/* current #if nesting */
239static bool             keepthis;		/* don't delete constant #if */
240
241static int              exitstat;		/* program exit status */
242
243static void             addsym(bool, bool, char *);
244static void             debug(const char *, ...) __printflike(1, 2);
245__dead static void      done(void);
246__dead static void      error(const char *);
247static int              findsym(const char *);
248static void             flushline(bool);
249static Linetype         get_line(void);
250static Linetype         ifeval(const char **);
251static void             ignoreoff(void);
252static void             ignoreon(void);
253static void             keywordedit(const char *);
254static void             nest(void);
255__dead static void      process(void);
256static const char      *skipcomment(const char *);
257static const char      *skipsym(const char *);
258static void             state(Ifstate);
259static int              strlcmp(const char *, const char *, size_t);
260__dead static void      usage(void);
261
262#define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_')
263
264/*
265 * The main program.
266 */
267int
268main(int argc, char *argv[])
269{
270	int opt;
271	struct stat isb, osb;
272
273	while ((opt = getopt(argc, argv, "i:D:U:I:o:cdeklst")) != -1)
274		switch (opt) {
275		case 'i': /* treat stuff controlled by these symbols as text */
276			/*
277			 * For strict backwards-compatibility the U or D
278			 * should be immediately after the -i but it doesn't
279			 * matter much if we relax that requirement.
280			 */
281			opt = *optarg++;
282			if (opt == 'D')
283				addsym(true, true, optarg);
284			else if (opt == 'U')
285				addsym(true, false, optarg);
286			else
287				usage();
288			break;
289		case 'D': /* define a symbol */
290			addsym(false, true, optarg);
291			break;
292		case 'U': /* undef a symbol */
293			addsym(false, false, optarg);
294			break;
295		case 'I':
296			/* no-op for compatibility with cpp */
297			break;
298		case 'c': /* treat -D as -U and vice versa */
299			complement = true;
300			break;
301		case 'd':
302			debugging = true;
303			break;
304		case 'e': /* fewer errors from dodgy lines */
305			iocccok = true;
306			break;
307		case 'k': /* process constant #ifs */
308			killconsts = true;
309			break;
310		case 'l': /* blank deleted lines instead of omitting them */
311			lnblank = true;
312			break;
313		case 'o': /* output to a file */
314			ofilename = optarg;
315			break;
316		case 's': /* only output list of symbols that control #ifs */
317			symlist = true;
318			break;
319		case 't': /* don't parse C comments */
320			text = true;
321			break;
322		default:
323			usage();
324		}
325	argc -= optind;
326	argv += optind;
327	if (nsyms == 0 && !symlist) {
328		warnx("must -D or -U at least one symbol");
329		usage();
330	}
331	if (argc > 1) {
332		errx(2, "can only do one file");
333	} else if (argc == 1 && strcmp(*argv, "-") != 0) {
334		filename = *argv;
335		input = fopen(filename, "r");
336		if (input == NULL)
337			err(2, "can't open %s", filename);
338	} else {
339		filename = "[stdin]";
340		input = stdin;
341	}
342	if (ofilename == NULL) {
343		output = stdout;
344	} else {
345		if (stat(ofilename, &osb) == 0) {
346			if (fstat(fileno(input), &isb) != 0)
347				err(2, "can't fstat %s", filename);
348
349			overwriting = (osb.st_dev == isb.st_dev &&
350			    osb.st_ino == isb.st_ino);
351		}
352		if (overwriting) {
353			int ofd;
354
355			snprintf(tmpname, sizeof(tmpname), "%s/unifdef.XXXXXX",
356				 dirname(ofilename));
357			if ((ofd = mkstemp(tmpname)) != -1)
358				output = fdopen(ofd, "w+");
359			if (output == NULL)
360				err(2, "can't create temporary file");
361			fchmod(ofd, isb.st_mode & ACCESSPERMS);
362		} else {
363			output = fopen(ofilename, "w");
364			if (output == NULL)
365				err(2, "can't open %s", ofilename);
366		}
367	}
368	process();
369	abort(); /* bug */
370}
371
372static void
373usage(void)
374{
375	fprintf(stderr, "usage: unifdef [-cdeklst] [-o output]"
376	    " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n");
377	exit(2);
378}
379
380/*
381 * A state transition function alters the global #if processing state
382 * in a particular way. The table below is indexed by the current
383 * processing state and the type of the current line.
384 *
385 * Nesting is handled by keeping a stack of states; some transition
386 * functions increase or decrease the depth. They also maintain the
387 * ignore state on a stack. In some complicated cases they have to
388 * alter the preprocessor directive, as follows.
389 *
390 * When we have processed a group that starts off with a known-false
391 * #if/#elif sequence (which has therefore been deleted) followed by a
392 * #elif that we don't understand and therefore must keep, we edit the
393 * latter into a #if to keep the nesting correct.
394 *
395 * When we find a true #elif in a group, the following block will
396 * always be kept and the rest of the sequence after the next #elif or
397 * #else will be discarded. We edit the #elif into a #else and the
398 * following directive to #endif since this has the desired behaviour.
399 *
400 * "Dodgy" directives are split across multiple lines, the most common
401 * example being a multi-line comment hanging off the right of the
402 * directive. We can handle them correctly only if there is no change
403 * from printing to dropping (or vice versa) caused by that directive.
404 * If the directive is the first of a group we have a choice between
405 * failing with an error, or passing it through unchanged instead of
406 * evaluating it. The latter is not the default to avoid questions from
407 * users about unifdef unexpectedly leaving behind preprocessor directives.
408 */
409typedef void state_fn(void);
410
411/* report an error */
412__dead static void Eelif (void) { error("Inappropriate #elif"); }
413__dead static void Eelse (void) { error("Inappropriate #else"); }
414__dead static void Eendif(void) { error("Inappropriate #endif"); }
415__dead static void Eeof  (void) { error("Premature EOF"); }
416__dead static void Eioccc(void) { error("Obfuscated preprocessor control line"); }
417/* plain line handling */
418static void print (void) { flushline(true); }
419static void drop  (void) { flushline(false); }
420/* output lacks group's start line */
421static void Strue (void) { drop();  ignoreoff(); state(IS_TRUE_PREFIX); }
422static void Sfalse(void) { drop();  ignoreoff(); state(IS_FALSE_PREFIX); }
423static void Selse (void) { drop();               state(IS_TRUE_ELSE); }
424/* print/pass this block */
425static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); }
426static void Pelse (void) { print();              state(IS_PASS_ELSE); }
427static void Pendif(void) { print(); --depth; }
428/* discard this block */
429static void Dfalse(void) { drop();  ignoreoff(); state(IS_FALSE_TRAILER); }
430static void Delif (void) { drop();  ignoreoff(); state(IS_FALSE_MIDDLE); }
431static void Delse (void) { drop();               state(IS_FALSE_ELSE); }
432static void Dendif(void) { drop();  --depth; }
433/* first line of group */
434static void Fdrop (void) { nest();  Dfalse(); }
435static void Fpass (void) { nest();  Pelif(); }
436static void Ftrue (void) { nest();  Strue(); }
437static void Ffalse(void) { nest();  Sfalse(); }
438/* variable pedantry for obfuscated lines */
439static void Oiffy (void) { if (iocccok) Fpass(); else Eioccc(); ignoreon(); }
440static void Oif   (void) { if (iocccok) Fpass(); else Eioccc(); }
441static void Oelif (void) { if (iocccok) Pelif(); else Eioccc(); }
442/* ignore comments in this block */
443static void Idrop (void) { Fdrop();  ignoreon(); }
444static void Itrue (void) { Ftrue();  ignoreon(); }
445static void Ifalse(void) { Ffalse(); ignoreon(); }
446/* edit this line */
447static void Mpass (void) { strncpy(keyword, "if  ", 4); Pelif(); }
448static void Mtrue (void) { keywordedit("else\n");  state(IS_TRUE_MIDDLE); }
449static void Melif (void) { keywordedit("endif\n"); state(IS_FALSE_TRAILER); }
450static void Melse (void) { keywordedit("endif\n"); state(IS_FALSE_ELSE); }
451
452static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
453/* IS_OUTSIDE */
454{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
455  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eendif,
456  print, done },
457/* IS_FALSE_PREFIX */
458{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
459  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
460  drop,  Eeof },
461/* IS_TRUE_PREFIX */
462{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
463  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
464  print, Eeof },
465/* IS_PASS_MIDDLE */
466{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
467  Oiffy, Oiffy, Fpass, Oif,   Oif,   Pelif, Oelif, Oelif, Pelse, Pendif,
468  print, Eeof },
469/* IS_FALSE_MIDDLE */
470{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
471  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
472  drop,  Eeof },
473/* IS_TRUE_MIDDLE */
474{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
475  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
476  print, Eeof },
477/* IS_PASS_ELSE */
478{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
479  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Pendif,
480  print, Eeof },
481/* IS_FALSE_ELSE */
482{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
483  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
484  drop,  Eeof },
485/* IS_TRUE_ELSE */
486{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
487  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eioccc,
488  print, Eeof },
489/* IS_FALSE_TRAILER */
490{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
491  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
492  drop,  Eeof }
493/*TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF
494  TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF (DODGY)
495  PLAIN  EOF */
496};
497
498/*
499 * State machine utility functions
500 */
501static void
502done(void)
503{
504	if (incomment)
505		error("EOF in comment");
506	if (fclose(output)) {
507		if (overwriting) {
508			unlink(tmpname);
509			errx(2, "%s unchanged", ofilename);
510		}
511	}
512	if (overwriting && rename(tmpname, ofilename)) {
513		unlink(tmpname);
514		errx(2, "%s unchanged", ofilename);
515	}
516	exit(exitstat);
517}
518static void
519ignoreoff(void)
520{
521	ignoring[depth] = ignoring[depth-1];
522}
523static void
524ignoreon(void)
525{
526	ignoring[depth] = true;
527}
528static void
529keywordedit(const char *replacement)
530{
531	strlcpy(keyword, replacement, tline + sizeof(tline) - keyword);
532	print();
533}
534static void
535nest(void)
536{
537	depth += 1;
538	if (depth >= MAXDEPTH)
539		error("Too many levels of nesting");
540	stifline[depth] = linenum;
541}
542static void
543state(Ifstate is)
544{
545	ifstate[depth] = is;
546}
547
548/*
549 * Write a line to the output or not, according to command line options.
550 */
551static void
552flushline(bool keep)
553{
554	if (symlist)
555		return;
556	if (keep ^ complement)
557		fputs(tline, output);
558	else {
559		if (lnblank)
560			putc('\n', output);
561		exitstat = 1;
562	}
563}
564
565/*
566 * The driver for the state machine.
567 */
568static void
569process(void)
570{
571	Linetype lineval;
572
573	for (;;) {
574		linenum++;
575		lineval = get_line();
576		trans_table[ifstate[depth]][lineval]();
577		debug("process %s -> %s depth %d",
578		    linetype_name[lineval],
579		    ifstate_name[ifstate[depth]], depth);
580	}
581}
582
583/*
584 * Parse a line and determine its type. We keep the preprocessor line
585 * parser state between calls in the global variable linestate, with
586 * help from skipcomment().
587 */
588static Linetype
589get_line(void)
590{
591	const char *cp;
592	int cursym;
593	int kwlen;
594	Linetype retval;
595	Comment_state wascomment;
596
597	if (fgets(tline, MAXLINE, input) == NULL)
598		return (LT_EOF);
599	retval = LT_PLAIN;
600	wascomment = incomment;
601	cp = skipcomment(tline);
602	if (linestate == LS_START) {
603		if (*cp == '#') {
604			linestate = LS_HASH;
605			cp = skipcomment(cp + 1);
606		} else if (*cp != '\0')
607			linestate = LS_DIRTY;
608	}
609	if (!incomment && linestate == LS_HASH) {
610		keyword = tline + (cp - tline);
611		cp = skipsym(cp);
612		kwlen = cp - keyword;
613		/* no way can we deal with a continuation inside a keyword */
614		if (strncmp(cp, "\\\n", 2) == 0)
615			Eioccc();
616		if (strlcmp("ifdef", keyword, kwlen) == 0 ||
617		    strlcmp("ifndef", keyword, kwlen) == 0) {
618			cp = skipcomment(cp);
619			if ((cursym = findsym(cp)) < 0)
620				retval = LT_IF;
621			else {
622				retval = (keyword[2] == 'n')
623				    ? LT_FALSE : LT_TRUE;
624				if (value[cursym] == NULL)
625					retval = (retval == LT_TRUE)
626					    ? LT_FALSE : LT_TRUE;
627				if (ignore[cursym])
628					retval = (retval == LT_TRUE)
629					    ? LT_TRUEI : LT_FALSEI;
630			}
631			cp = skipsym(cp);
632		} else if (strlcmp("if", keyword, kwlen) == 0)
633			retval = ifeval(&cp);
634		else if (strlcmp("elif", keyword, kwlen) == 0)
635			retval = ifeval(&cp) - LT_IF + LT_ELIF;
636		else if (strlcmp("else", keyword, kwlen) == 0)
637			retval = LT_ELSE;
638		else if (strlcmp("endif", keyword, kwlen) == 0)
639			retval = LT_ENDIF;
640		else {
641			linestate = LS_DIRTY;
642			retval = LT_PLAIN;
643		}
644		cp = skipcomment(cp);
645		if (*cp != '\0') {
646			linestate = LS_DIRTY;
647			if (retval == LT_TRUE || retval == LT_FALSE ||
648			    retval == LT_TRUEI || retval == LT_FALSEI)
649				retval = LT_IF;
650			if (retval == LT_ELTRUE || retval == LT_ELFALSE)
651				retval = LT_ELIF;
652		}
653		if (retval != LT_PLAIN && (wascomment || incomment)) {
654			retval += LT_DODGY;
655			if (incomment)
656				linestate = LS_DIRTY;
657		}
658	}
659	if (linestate == LS_DIRTY) {
660		while (*cp != '\0')
661			cp = skipcomment(cp + 1);
662	}
663	debug("parser %s comment %s line",
664	    comment_name[incomment], linestate_name[linestate]);
665	return (retval);
666}
667
668/*
669 * These are the binary operators that are supported by the expression
670 * evaluator. Note that if support for division is added then we also
671 * need short-circuiting booleans because of divide-by-zero.
672 */
673static int op_lt(int a, int b) { return (a < b); }
674static int op_gt(int a, int b) { return (a > b); }
675static int op_le(int a, int b) { return (a <= b); }
676static int op_ge(int a, int b) { return (a >= b); }
677static int op_eq(int a, int b) { return (a == b); }
678static int op_ne(int a, int b) { return (a != b); }
679static int op_or(int a, int b) { return (a || b); }
680static int op_and(int a, int b) { return (a && b); }
681
682/*
683 * An evaluation function takes three arguments, as follows: (1) a pointer to
684 * an element of the precedence table which lists the operators at the current
685 * level of precedence; (2) a pointer to an integer which will receive the
686 * value of the expression; and (3) a pointer to a char* that points to the
687 * expression to be evaluated and that is updated to the end of the expression
688 * when evaluation is complete. The function returns LT_FALSE if the value of
689 * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the
690 * expression could not be evaluated.
691 */
692struct ops;
693
694typedef Linetype eval_fn(const struct ops *, int *, const char **);
695
696static eval_fn eval_table, eval_unary;
697
698/*
699 * The precedence table. Expressions involving binary operators are evaluated
700 * in a table-driven way by eval_table. When it evaluates a subexpression it
701 * calls the inner function with its first argument pointing to the next
702 * element of the table. Innermost expressions have special non-table-driven
703 * handling.
704 */
705static const struct ops {
706	eval_fn *inner;
707	struct op {
708		const char *str;
709		int (*fn)(int, int);
710	} op[5];
711} eval_ops[] = {
712	{ eval_table, { { "||", op_or } } },
713	{ eval_table, { { "&&", op_and } } },
714	{ eval_table, { { "==", op_eq },
715			{ "!=", op_ne } } },
716	{ eval_unary, { { "<=", op_le },
717			{ ">=", op_ge },
718			{ "<", op_lt },
719			{ ">", op_gt } } }
720};
721
722/*
723 * Function for evaluating the innermost parts of expressions,
724 * viz. !expr (expr) defined(symbol) symbol number
725 * We reset the keepthis flag when we find a non-constant subexpression.
726 */
727static Linetype
728eval_unary(const struct ops *ops, int *valp, const char **cpp)
729{
730	const char *cp;
731	char *ep;
732	int sym;
733
734	cp = skipcomment(*cpp);
735	if (*cp == '!') {
736		debug("eval%td !", ops - eval_ops);
737		cp++;
738		if (eval_unary(ops, valp, &cp) == LT_IF)
739			return (LT_IF);
740		*valp = !*valp;
741	} else if (*cp == '(') {
742		cp++;
743		debug("eval%td (", ops - eval_ops);
744		if (eval_table(eval_ops, valp, &cp) == LT_IF)
745			return (LT_IF);
746		cp = skipcomment(cp);
747		if (*cp++ != ')')
748			return (LT_IF);
749	} else if (isdigit((unsigned char)*cp)) {
750		debug("eval%td number", ops - eval_ops);
751		*valp = strtol(cp, &ep, 0);
752		cp = skipsym(cp);
753	} else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) {
754		cp = skipcomment(cp+7);
755		debug("eval%td defined", ops - eval_ops);
756		if (*cp++ != '(')
757			return (LT_IF);
758		cp = skipcomment(cp);
759		sym = findsym(cp);
760		if (sym < 0 || symlist)
761			return (LT_IF);
762		*valp = (value[sym] != NULL);
763		cp = skipsym(cp);
764		cp = skipcomment(cp);
765		if (*cp++ != ')')
766			return (LT_IF);
767		keepthis = false;
768	} else if (!endsym(*cp)) {
769		debug("eval%td symbol", ops - eval_ops);
770		sym = findsym(cp);
771		if (sym < 0 || symlist)
772			return (LT_IF);
773		if (value[sym] == NULL)
774			*valp = 0;
775		else {
776			*valp = strtol(value[sym], &ep, 0);
777			if (*ep != '\0' || ep == value[sym])
778				return (LT_IF);
779		}
780		cp = skipsym(cp);
781		keepthis = false;
782	} else {
783		debug("eval%td bad expr", ops - eval_ops);
784		return (LT_IF);
785	}
786
787	*cpp = cp;
788	debug("eval%td = %d", ops - eval_ops, *valp);
789	return (*valp ? LT_TRUE : LT_FALSE);
790}
791
792/*
793 * Table-driven evaluation of binary operators.
794 */
795static Linetype
796eval_table(const struct ops *ops, int *valp, const char **cpp)
797{
798	const struct op *op;
799	const char *cp;
800	int val;
801
802	debug("eval%td", ops - eval_ops);
803	cp = *cpp;
804	if (ops->inner(ops+1, valp, &cp) == LT_IF)
805		return (LT_IF);
806	for (;;) {
807		cp = skipcomment(cp);
808		for (op = ops->op; op->str != NULL; op++)
809			if (strncmp(cp, op->str, strlen(op->str)) == 0)
810				break;
811		if (op->str == NULL)
812			break;
813		cp += strlen(op->str);
814		debug("eval%td %s", ops - eval_ops, op->str);
815		if (ops->inner(ops+1, &val, &cp) == LT_IF)
816			return (LT_IF);
817		*valp = op->fn(*valp, val);
818	}
819
820	*cpp = cp;
821	debug("eval%td = %d", ops - eval_ops, *valp);
822	return (*valp ? LT_TRUE : LT_FALSE);
823}
824
825/*
826 * Evaluate the expression on a #if or #elif line. If we can work out
827 * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
828 * return just a generic LT_IF.
829 */
830static Linetype
831ifeval(const char **cpp)
832{
833	int ret;
834	int val;
835
836	debug("eval %s", *cpp);
837	keepthis = killconsts ? false : true;
838	ret = eval_table(eval_ops, &val, cpp);
839	debug("eval = %d", val);
840	return (keepthis ? LT_IF : ret);
841}
842
843/*
844 * Skip over comments and stop at the next character position that is
845 * not whitespace. Between calls we keep the comment state in the
846 * global variable incomment, and we also adjust the global variable
847 * linestate when we see a newline.
848 * XXX: doesn't cope with the buffer splitting inside a state transition.
849 */
850static const char *
851skipcomment(const char *cp)
852{
853	if (text || ignoring[depth]) {
854		for (; isspace((unsigned char)*cp); cp++)
855			if (*cp == '\n')
856				linestate = LS_START;
857		return (cp);
858	}
859	while (*cp != '\0')
860		/* don't reset to LS_START after a line continuation */
861		if (strncmp(cp, "\\\n", 2) == 0)
862			cp += 2;
863		else switch (incomment) {
864		case NO_COMMENT:
865			if (strncmp(cp, "/\\\n", 3) == 0) {
866				incomment = STARTING_COMMENT;
867				cp += 3;
868			} else if (strncmp(cp, "/*", 2) == 0) {
869				incomment = C_COMMENT;
870				cp += 2;
871			} else if (strncmp(cp, "//", 2) == 0) {
872				incomment = CXX_COMMENT;
873				cp += 2;
874			} else if (strncmp(cp, "\n", 1) == 0) {
875				linestate = LS_START;
876				cp += 1;
877			} else if (strchr(" \t", *cp) != NULL) {
878				cp += 1;
879			} else
880				return (cp);
881			continue;
882		case CXX_COMMENT:
883			if (strncmp(cp, "\n", 1) == 0) {
884				incomment = NO_COMMENT;
885				linestate = LS_START;
886			}
887			cp += 1;
888			continue;
889		case C_COMMENT:
890			if (strncmp(cp, "*\\\n", 3) == 0) {
891				incomment = FINISHING_COMMENT;
892				cp += 3;
893			} else if (strncmp(cp, "*/", 2) == 0) {
894				incomment = NO_COMMENT;
895				cp += 2;
896			} else
897				cp += 1;
898			continue;
899		case STARTING_COMMENT:
900			if (*cp == '*') {
901				incomment = C_COMMENT;
902				cp += 1;
903			} else if (*cp == '/') {
904				incomment = CXX_COMMENT;
905				cp += 1;
906			} else {
907				incomment = NO_COMMENT;
908				linestate = LS_DIRTY;
909			}
910			continue;
911		case FINISHING_COMMENT:
912			if (*cp == '/') {
913				incomment = NO_COMMENT;
914				cp += 1;
915			} else
916				incomment = C_COMMENT;
917			continue;
918		default:
919			abort(); /* bug */
920		}
921	return (cp);
922}
923
924/*
925 * Skip over an identifier.
926 */
927static const char *
928skipsym(const char *cp)
929{
930	while (!endsym(*cp))
931		++cp;
932	return (cp);
933}
934
935/*
936 * Look for the symbol in the symbol table. If it is found, we return
937 * the symbol table index, else we return -1.
938 */
939static int
940findsym(const char *str)
941{
942	const char *cp;
943	int symind;
944
945	cp = skipsym(str);
946	if (cp == str)
947		return (-1);
948	if (symlist)
949		printf("%.*s\n", (int)(cp-str), str);
950	for (symind = 0; symind < nsyms; ++symind) {
951		if (strlcmp(symname[symind], str, cp-str) == 0) {
952			debug("findsym %s %s", symname[symind],
953			    value[symind] ? value[symind] : "");
954			return (symind);
955		}
956	}
957	return (-1);
958}
959
960/*
961 * Add a symbol to the symbol table.
962 */
963static void
964addsym(bool ignorethis, bool definethis, char *sym)
965{
966	int symind;
967	char *val;
968
969	symind = findsym(sym);
970	if (symind < 0) {
971		if (nsyms >= MAXSYMS)
972			errx(2, "too many symbols");
973		symind = nsyms++;
974	}
975	symname[symind] = sym;
976	ignore[symind] = ignorethis;
977	val = sym + (skipsym(sym) - sym);
978	if (definethis) {
979		if (*val == '=') {
980			value[symind] = val+1;
981			*val = '\0';
982		} else if (*val == '\0')
983			value[symind] = "";
984		else
985			usage();
986	} else {
987		if (*val != '\0')
988			usage();
989		value[symind] = NULL;
990	}
991}
992
993/*
994 * Compare s with n characters of t.
995 * The same as strncmp() except that it checks that s[n] == '\0'.
996 */
997static int
998strlcmp(const char *s, const char *t, size_t n)
999{
1000	while (n-- && *t != '\0')
1001		if (*s != *t)
1002			return ((unsigned char)*s - (unsigned char)*t);
1003		else
1004			++s, ++t;
1005	return ((unsigned char)*s);
1006}
1007
1008/*
1009 * Diagnostics.
1010 */
1011static void
1012debug(const char *msg, ...)
1013{
1014	va_list ap;
1015
1016	if (debugging) {
1017		va_start(ap, msg);
1018		vwarnx(msg, ap);
1019		va_end(ap);
1020	}
1021}
1022
1023static void
1024error(const char *msg)
1025{
1026	if (depth == 0)
1027		warnx("%s: %d: %s", filename, linenum, msg);
1028	else
1029		warnx("%s: %d: %s (#if line %d depth %d)",
1030		    filename, linenum, msg, stifline[depth], depth);
1031	fclose(output);
1032	if (overwriting) {
1033		unlink(tmpname);
1034		errx(2, "%s unchanged", ofilename);
1035	}
1036	errx(2, "output may be truncated");
1037}
1038