1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2002 - 2015 Tony Finch <dot@dotat.at>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28/*
29 * unifdef - remove ifdef'ed lines
30 *
31 * This code was derived from software contributed to Berkeley by Dave Yost.
32 * It was rewritten to support ANSI C by Tony Finch. The original version
33 * of unifdef carried the 4-clause BSD copyright licence. None of its code
34 * remains in this version (though some of the names remain) so it now
35 * carries a more liberal licence.
36 *
37 *  Wishlist:
38 *      provide an option which will append the name of the
39 *        appropriate symbol after #else's and #endif's
40 *      provide an option which will check symbols after
41 *        #else's and #endif's to see that they match their
42 *        corresponding #ifdef or #ifndef
43 *
44 *   These require better buffer handling, which would also make
45 *   it possible to handle all "dodgy" directives correctly.
46 */
47
48#include <sys/param.h>
49#include <sys/stat.h>
50#include <sys/tree.h>
51
52#include <assert.h>
53#include <ctype.h>
54#include <err.h>
55#include <stdarg.h>
56#include <stdbool.h>
57#include <stdio.h>
58#include <stdlib.h>
59#include <string.h>
60#include <unistd.h>
61
62static const char copyright[] =
63    "@(#) $Version: unifdef-2.11 $\n"
64    "@(#) $FreeBSD$\n"
65    "@(#) $Author: Tony Finch (dot@dotat.at) $\n"
66    "@(#) $URL: https://dotat.at/prog/unifdef $\n"
67;
68
69/* types of input lines: */
70typedef enum {
71	LT_TRUEI,		/* a true #if with ignore flag */
72	LT_FALSEI,		/* a false #if with ignore flag */
73	LT_IF,			/* an unknown #if */
74	LT_TRUE,		/* a true #if */
75	LT_FALSE,		/* a false #if */
76	LT_ELIF,		/* an unknown #elif */
77	LT_ELTRUE,		/* a true #elif */
78	LT_ELFALSE,		/* a false #elif */
79	LT_ELSE,		/* #else */
80	LT_ENDIF,		/* #endif */
81	LT_DODGY,		/* flag: directive is not on one line */
82	LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
83	LT_PLAIN,		/* ordinary line */
84	LT_EOF,			/* end of file */
85	LT_ERROR,		/* unevaluable #if */
86	LT_COUNT
87} Linetype;
88
89static char const * const linetype_name[] = {
90	"TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
91	"ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
92	"DODGY TRUEI", "DODGY FALSEI",
93	"DODGY IF", "DODGY TRUE", "DODGY FALSE",
94	"DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
95	"DODGY ELSE", "DODGY ENDIF",
96	"PLAIN", "EOF", "ERROR"
97};
98
99#define linetype_if2elif(lt) ((Linetype)(lt - LT_IF + LT_ELIF))
100#define linetype_2dodgy(lt) ((Linetype)(lt + LT_DODGY))
101
102/* state of #if processing */
103typedef enum {
104	IS_OUTSIDE,
105	IS_FALSE_PREFIX,	/* false #if followed by false #elifs */
106	IS_TRUE_PREFIX,		/* first non-false #(el)if is true */
107	IS_PASS_MIDDLE,		/* first non-false #(el)if is unknown */
108	IS_FALSE_MIDDLE,	/* a false #elif after a pass state */
109	IS_TRUE_MIDDLE,		/* a true #elif after a pass state */
110	IS_PASS_ELSE,		/* an else after a pass state */
111	IS_FALSE_ELSE,		/* an else after a true state */
112	IS_TRUE_ELSE,		/* an else after only false states */
113	IS_FALSE_TRAILER,	/* #elifs after a true are false */
114	IS_COUNT
115} Ifstate;
116
117static char const * const ifstate_name[] = {
118	"OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
119	"PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
120	"PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
121	"FALSE_TRAILER"
122};
123
124/* state of comment parser */
125typedef enum {
126	NO_COMMENT = false,	/* outside a comment */
127	C_COMMENT,		/* in a comment like this one */
128	CXX_COMMENT,		/* between // and end of line */
129	STARTING_COMMENT,	/* just after slash-backslash-newline */
130	FINISHING_COMMENT,	/* star-backslash-newline in a C comment */
131	CHAR_LITERAL,		/* inside '' */
132	STRING_LITERAL		/* inside "" */
133} Comment_state;
134
135static char const * const comment_name[] = {
136	"NO", "C", "CXX", "STARTING", "FINISHING", "CHAR", "STRING"
137};
138
139/* state of preprocessor line parser */
140typedef enum {
141	LS_START,		/* only space and comments on this line */
142	LS_HASH,		/* only space, comments, and a hash */
143	LS_DIRTY		/* this line can't be a preprocessor line */
144} Line_state;
145
146static char const * const linestate_name[] = {
147	"START", "HASH", "DIRTY"
148};
149
150/*
151 * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
152 */
153#define	MAXDEPTH        64			/* maximum #if nesting */
154#define	MAXLINE         4096			/* maximum length of line */
155
156/*
157 * Sometimes when editing a keyword the replacement text is longer, so
158 * we leave some space at the end of the tline buffer to accommodate this.
159 */
160#define	EDITSLOP        10
161
162/*
163 * C17/18 allow 63 characters per macro name, but up to 127 arbitrarily large
164 * parameters.
165 */
166struct macro {
167	RB_ENTRY(macro)	entry;
168	const char	*name;
169	const char	*value;
170	bool		ignore;		/* -iDsym or -iUsym */
171};
172
173static int
174macro_cmp(struct macro *a, struct macro *b)
175{
176	return (strcmp(a->name, b->name));
177}
178
179static RB_HEAD(MACROMAP, macro) macro_tree = RB_INITIALIZER(&macro_tree);
180RB_GENERATE_STATIC(MACROMAP, macro, entry, macro_cmp);
181
182/*
183 * Globals.
184 */
185
186static bool             compblank;		/* -B: compress blank lines */
187static bool             lnblank;		/* -b: blank deleted lines */
188static bool             complement;		/* -c: do the complement */
189static bool             debugging;		/* -d: debugging reports */
190static bool             inplace;		/* -m: modify in place */
191static bool             iocccok;		/* -e: fewer IOCCC errors */
192static bool             strictlogic;		/* -K: keep ambiguous #ifs */
193static bool             killconsts;		/* -k: eval constant #ifs */
194static bool             lnnum;			/* -n: add #line directives */
195static bool             symlist;		/* -s: output symbol list */
196static bool             symdepth;		/* -S: output symbol depth */
197static bool             text;			/* -t: this is a text file */
198
199static FILE            *input;			/* input file pointer */
200static const char      *filename;		/* input file name */
201static int              linenum;		/* current line number */
202static const char      *linefile;		/* file name for #line */
203static FILE            *output;			/* output file pointer */
204static const char      *ofilename;		/* output file name */
205static const char      *backext;		/* backup extension */
206static char            *tempname;		/* avoid splatting input */
207
208static char             tline[MAXLINE+EDITSLOP];/* input buffer plus space */
209static char            *keyword;		/* used for editing #elif's */
210
211/*
212 * When processing a file, the output's newline style will match the
213 * input's, and unifdef correctly handles CRLF or LF endings whatever
214 * the platform's native style. The stdio streams are opened in binary
215 * mode to accommodate platforms whose native newline style is CRLF.
216 * When the output isn't a processed input file (when it is error /
217 * debug / diagnostic messages) then unifdef uses native line endings.
218 */
219
220static const char      *newline;		/* input file format */
221static const char       newline_unix[] = "\n";
222static const char       newline_crlf[] = "\r\n";
223
224static Comment_state    incomment;		/* comment parser state */
225static Line_state       linestate;		/* #if line parser state */
226static Ifstate          ifstate[MAXDEPTH];	/* #if processor state */
227static bool             ignoring[MAXDEPTH];	/* ignore comments state */
228static int              stifline[MAXDEPTH];	/* start of current #if */
229static int              depth;			/* current #if nesting */
230static int              delcount;		/* count of deleted lines */
231static unsigned         blankcount;		/* count of blank lines */
232static unsigned         blankmax;		/* maximum recent blankcount */
233static bool             constexpr;		/* constant #if expression */
234static bool             zerosyms;		/* to format symdepth output */
235static bool             firstsym;		/* ditto */
236
237static int              exitmode;		/* exit status mode */
238static int              exitstat;		/* program exit status */
239static bool             altered;		/* was this file modified? */
240
241static void             addsym1(bool, bool, char *);
242static void             addsym2(bool, const char *, const char *);
243static char            *astrcat(const char *, const char *);
244static void             cleantemp(void);
245static void             closeio(void);
246static void             debug(const char *, ...);
247static void             debugsym(const char *, const struct macro *);
248static bool             defundef(void);
249static void             defundefile(const char *);
250static void             done(void);
251static void             error(const char *);
252static struct macro    *findsym(const char **);
253static void             flushline(bool);
254static void             hashline(void);
255static void             help(void);
256static Linetype         ifeval(const char **);
257static void             ignoreoff(void);
258static void             ignoreon(void);
259static void             indirectsym(void);
260static void             keywordedit(const char *);
261static const char      *matchsym(const char *, const char *);
262static void             nest(void);
263static Linetype         parseline(void);
264static void             process(void);
265static void             processinout(const char *, const char *);
266static const char      *skipargs(const char *);
267static const char      *skipcomment(const char *);
268static const char      *skiphash(void);
269static const char      *skipline(const char *);
270static const char      *skipsym(const char *);
271static void             state(Ifstate);
272static void             unnest(void);
273static void             usage(void);
274static void             version(void);
275static const char      *xstrdup(const char *, const char *);
276
277#define endsym(c) (!isalnum((unsigned char)c) && c != '_')
278
279static FILE *
280mktempmode(char *tmp, int mode)
281{
282	int rc, fd;
283
284	mode &= (S_IRWXU|S_IRWXG|S_IRWXO);
285	fd = mkstemp(tmp);
286	if (fd < 0)
287		err(2, "can't create %s", tmp);
288	rc = fchmod(fd, mode);
289	if (rc < 0)
290		err(2, "can't fchmod %s mode=0o%o", tmp, mode);
291	return (fdopen(fd, "wb"));
292}
293
294/*
295 * The main program.
296 */
297int
298main(int argc, char *argv[])
299{
300	int opt;
301
302	while ((opt = getopt(argc, argv, "i:D:U:f:I:M:o:x:bBcdehKklmnsStV")) != -1)
303		switch (opt) {
304		case 'i': /* treat stuff controlled by these symbols as text */
305			/*
306			 * For strict backwards-compatibility the U or D
307			 * should be immediately after the -i but it doesn't
308			 * matter much if we relax that requirement.
309			 */
310			opt = *optarg++;
311			if (opt == 'D')
312				addsym1(true, true, optarg);
313			else if (opt == 'U')
314				addsym1(true, false, optarg);
315			else
316				usage();
317			break;
318		case 'D': /* define a symbol */
319			addsym1(false, true, optarg);
320			break;
321		case 'U': /* undef a symbol */
322			addsym1(false, false, optarg);
323			break;
324		case 'I': /* no-op for compatibility with cpp */
325			break;
326		case 'b': /* blank deleted lines instead of omitting them */
327		case 'l': /* backwards compatibility */
328			lnblank = true;
329			break;
330		case 'B': /* compress blank lines around removed section */
331			compblank = true;
332			break;
333		case 'c': /* treat -D as -U and vice versa */
334			complement = true;
335			break;
336		case 'd':
337			debugging = true;
338			break;
339		case 'e': /* fewer errors from dodgy lines */
340			iocccok = true;
341			break;
342		case 'f': /* definitions file */
343			defundefile(optarg);
344			break;
345		case 'h':
346			help();
347			break;
348		case 'K': /* keep ambiguous #ifs */
349			strictlogic = true;
350			break;
351		case 'k': /* process constant #ifs */
352			killconsts = true;
353			break;
354		case 'm': /* modify in place */
355			inplace = true;
356			break;
357		case 'M': /* modify in place and keep backup */
358			inplace = true;
359			if (strlen(optarg) > 0)
360				backext = optarg;
361			break;
362		case 'n': /* add #line directive after deleted lines */
363			lnnum = true;
364			break;
365		case 'o': /* output to a file */
366			ofilename = optarg;
367			break;
368		case 's': /* only output list of symbols that control #ifs */
369			symlist = true;
370			break;
371		case 'S': /* list symbols with their nesting depth */
372			symlist = symdepth = true;
373			break;
374		case 't': /* don't parse C comments */
375			text = true;
376			break;
377		case 'V':
378			version();
379			break;
380		case 'x':
381			exitmode = atoi(optarg);
382			if(exitmode < 0 || exitmode > 2)
383				usage();
384			break;
385		default:
386			usage();
387		}
388	argc -= optind;
389	argv += optind;
390	if (compblank && lnblank)
391		errx(2, "-B and -b are mutually exclusive");
392	if (symlist && (ofilename != NULL || inplace || argc > 1))
393		errx(2, "-s only works with one input file");
394	if (argc > 1 && ofilename != NULL)
395		errx(2, "-o cannot be used with multiple input files");
396	if (argc > 1 && !inplace)
397		errx(2, "multiple input files require -m or -M");
398	if (argc == 0 && inplace)
399		errx(2, "-m requires an input file");
400	if (argc == 0)
401		argc = 1;
402	if (argc == 1 && !inplace && ofilename == NULL)
403		ofilename = "-";
404	indirectsym();
405
406	atexit(cleantemp);
407	if (ofilename != NULL)
408		processinout(*argv, ofilename);
409	else while (argc-- > 0) {
410		processinout(*argv, *argv);
411		argv++;
412	}
413	switch(exitmode) {
414	case(0): exit(exitstat);
415	case(1): exit(!exitstat);
416	case(2): exit(0);
417	default: abort(); /* bug */
418	}
419}
420
421/*
422 * File logistics.
423 */
424static void
425processinout(const char *ifn, const char *ofn)
426{
427	struct stat st;
428
429	if (ifn == NULL || strcmp(ifn, "-") == 0) {
430		filename = "[stdin]";
431		linefile = NULL;
432		input = stdin;
433	} else {
434		filename = ifn;
435		linefile = ifn;
436		input = fopen(ifn, "rb");
437		if (input == NULL)
438			err(2, "can't open %s", ifn);
439	}
440	if (strcmp(ofn, "-") == 0) {
441		output = stdout;
442		process();
443		return;
444	}
445	if (stat(ofn, &st) < 0) {
446		output = fopen(ofn, "wb");
447		if (output == NULL)
448			err(2, "can't create %s", ofn);
449		process();
450		return;
451	}
452
453	tempname = astrcat(ofn, ".XXXXXX");
454	output = mktempmode(tempname, st.st_mode);
455	if (output == NULL)
456		err(2, "can't create %s", tempname);
457
458	process();
459
460	if (backext != NULL) {
461		char *backname = astrcat(ofn, backext);
462		if (rename(ofn, backname) < 0)
463			err(2, "can't rename \"%s\" to \"%s\"", ofn, backname);
464		free(backname);
465	}
466	/* leave file unmodified if unifdef made no changes */
467	if (!altered && backext == NULL) {
468		if (remove(tempname) < 0)
469			warn("can't remove \"%s\"", tempname);
470	} else if (rename(tempname, ofn) < 0)
471		err(2, "can't rename \"%s\" to \"%s\"", tempname, ofn);
472	free(tempname);
473	tempname = NULL;
474}
475
476/*
477 * For cleaning up if there is an error.
478 */
479static void
480cleantemp(void)
481{
482	if (tempname != NULL)
483		remove(tempname);
484}
485
486/*
487 * Self-identification functions.
488 */
489
490static void
491version(void)
492{
493	const char *c = copyright;
494	for (;;) {
495		while (*++c != '$')
496			if (*c == '\0')
497				exit(0);
498		while (*++c != '$')
499			putc(*c, stderr);
500		putc('\n', stderr);
501	}
502}
503
504static void
505synopsis(FILE *fp)
506{
507	fprintf(fp,
508	    "usage:	unifdef [-bBcdehKkmnsStV] [-x{012}] [-Mext] [-opath] \\\n"
509	    "		[-[i]Dsym[=val]] [-[i]Usym] [-fpath] ... [file] ...\n");
510}
511
512static void
513usage(void)
514{
515	synopsis(stderr);
516	exit(2);
517}
518
519static void
520help(void)
521{
522	synopsis(stdout);
523	printf(
524	    "	-Dsym=val  define preprocessor symbol with given value\n"
525	    "	-Dsym      define preprocessor symbol with value 1\n"
526	    "	-Usym	   preprocessor symbol is undefined\n"
527	    "	-iDsym=val \\  ignore C strings and comments\n"
528	    "	-iDsym      ) in sections controlled by these\n"
529	    "	-iUsym	   /  preprocessor symbols\n"
530	    "	-fpath	file containing #define and #undef directives\n"
531	    "	-b	blank lines instead of deleting them\n"
532	    "	-B	compress blank lines around deleted section\n"
533	    "	-c	complement (invert) keep vs. delete\n"
534	    "	-d	debugging mode\n"
535	    "	-e	ignore multiline preprocessor directives\n"
536	    "	-h	print help\n"
537	    "	-Ipath	extra include file path (ignored)\n"
538	    "	-K	disable && and || short-circuiting\n"
539	    "	-k	process constant #if expressions\n"
540	    "	-Mext	modify in place and keep backups\n"
541	    "	-m	modify input files in place\n"
542	    "	-n	add #line directives to output\n"
543	    "	-opath	output file name\n"
544	    "	-S	list #if control symbols with nesting\n"
545	    "	-s	list #if control symbols\n"
546	    "	-t	ignore C strings and comments\n"
547	    "	-V	print version\n"
548	    "	-x{012}	exit status mode\n"
549	);
550	exit(0);
551}
552
553/*
554 * A state transition function alters the global #if processing state
555 * in a particular way. The table below is indexed by the current
556 * processing state and the type of the current line.
557 *
558 * Nesting is handled by keeping a stack of states; some transition
559 * functions increase or decrease the depth. They also maintain the
560 * ignore state on a stack. In some complicated cases they have to
561 * alter the preprocessor directive, as follows.
562 *
563 * When we have processed a group that starts off with a known-false
564 * #if/#elif sequence (which has therefore been deleted) followed by a
565 * #elif that we don't understand and therefore must keep, we edit the
566 * latter into a #if to keep the nesting correct. We use memcpy() to
567 * overwrite the 4 byte token "elif" with "if  " without a '\0' byte.
568 *
569 * When we find a true #elif in a group, the following block will
570 * always be kept and the rest of the sequence after the next #elif or
571 * #else will be discarded. We edit the #elif into a #else and the
572 * following directive to #endif since this has the desired behaviour.
573 *
574 * "Dodgy" directives are split across multiple lines, the most common
575 * example being a multi-line comment hanging off the right of the
576 * directive. We can handle them correctly only if there is no change
577 * from printing to dropping (or vice versa) caused by that directive.
578 * If the directive is the first of a group we have a choice between
579 * failing with an error, or passing it through unchanged instead of
580 * evaluating it. The latter is not the default to avoid questions from
581 * users about unifdef unexpectedly leaving behind preprocessor directives.
582 */
583typedef void state_fn(void);
584
585/* report an error */
586static void Eelif (void) { error("Inappropriate #elif"); }
587static void Eelse (void) { error("Inappropriate #else"); }
588static void Eendif(void) { error("Inappropriate #endif"); }
589static void Eeof  (void) { error("Premature EOF"); }
590static void Eioccc(void) { error("Obfuscated preprocessor control line"); }
591/* plain line handling */
592static void print (void) { flushline(true); }
593static void drop  (void) { flushline(false); }
594/* output lacks group's start line */
595static void Strue (void) { drop();  ignoreoff(); state(IS_TRUE_PREFIX); }
596static void Sfalse(void) { drop();  ignoreoff(); state(IS_FALSE_PREFIX); }
597static void Selse (void) { drop();               state(IS_TRUE_ELSE); }
598/* print/pass this block */
599static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); }
600static void Pelse (void) { print();              state(IS_PASS_ELSE); }
601static void Pendif(void) { print(); unnest(); }
602/* discard this block */
603static void Dfalse(void) { drop();  ignoreoff(); state(IS_FALSE_TRAILER); }
604static void Delif (void) { drop();  ignoreoff(); state(IS_FALSE_MIDDLE); }
605static void Delse (void) { drop();               state(IS_FALSE_ELSE); }
606static void Dendif(void) { drop();  unnest(); }
607/* first line of group */
608static void Fdrop (void) { nest();  Dfalse(); }
609static void Fpass (void) { nest();  Pelif(); }
610static void Ftrue (void) { nest();  Strue(); }
611static void Ffalse(void) { nest();  Sfalse(); }
612/* variable pedantry for obfuscated lines */
613static void Oiffy (void) { if (!iocccok) Eioccc(); Fpass(); ignoreon(); }
614static void Oif   (void) { if (!iocccok) Eioccc(); Fpass(); }
615static void Oelif (void) { if (!iocccok) Eioccc(); Pelif(); }
616/* ignore comments in this block */
617static void Idrop (void) { Fdrop();  ignoreon(); }
618static void Itrue (void) { Ftrue();  ignoreon(); }
619static void Ifalse(void) { Ffalse(); ignoreon(); }
620/* modify this line */
621static void Mpass (void) { memcpy(keyword, "if  ", 4); Pelif(); }
622static void Mtrue (void) { keywordedit("else");  state(IS_TRUE_MIDDLE); }
623static void Melif (void) { keywordedit("endif"); state(IS_FALSE_TRAILER); }
624static void Melse (void) { keywordedit("endif"); state(IS_FALSE_ELSE); }
625
626static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
627/* IS_OUTSIDE */
628{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
629  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eendif,
630  print, done,  abort },
631/* IS_FALSE_PREFIX */
632{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
633  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
634  drop,  Eeof,  abort },
635/* IS_TRUE_PREFIX */
636{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
637  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
638  print, Eeof,  abort },
639/* IS_PASS_MIDDLE */
640{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
641  Oiffy, Oiffy, Fpass, Oif,   Oif,   Pelif, Oelif, Oelif, Pelse, Pendif,
642  print, Eeof,  abort },
643/* IS_FALSE_MIDDLE */
644{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
645  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
646  drop,  Eeof,  abort },
647/* IS_TRUE_MIDDLE */
648{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
649  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
650  print, Eeof,  abort },
651/* IS_PASS_ELSE */
652{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
653  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Pendif,
654  print, Eeof,  abort },
655/* IS_FALSE_ELSE */
656{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
657  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
658  drop,  Eeof,  abort },
659/* IS_TRUE_ELSE */
660{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
661  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eioccc,
662  print, Eeof,  abort },
663/* IS_FALSE_TRAILER */
664{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
665  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
666  drop,  Eeof,  abort }
667/*TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF
668  TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF (DODGY)
669  PLAIN  EOF    ERROR */
670};
671
672/*
673 * State machine utility functions
674 */
675static void
676ignoreoff(void)
677{
678	if (depth == 0)
679		abort(); /* bug */
680	ignoring[depth] = ignoring[depth-1];
681}
682static void
683ignoreon(void)
684{
685	ignoring[depth] = true;
686}
687static void
688keywordedit(const char *replacement)
689{
690	snprintf(keyword, tline + sizeof(tline) - keyword,
691	    "%s%s", replacement, newline);
692	altered = true;
693	print();
694}
695static void
696nest(void)
697{
698	if (depth > MAXDEPTH-1)
699		abort(); /* bug */
700	if (depth == MAXDEPTH-1)
701		error("Too many levels of nesting");
702	depth += 1;
703	stifline[depth] = linenum;
704}
705static void
706unnest(void)
707{
708	if (depth == 0)
709		abort(); /* bug */
710	depth -= 1;
711}
712static void
713state(Ifstate is)
714{
715	ifstate[depth] = is;
716}
717
718/*
719 * The last state transition function. When this is called,
720 * lineval == LT_EOF, so the process() loop will terminate.
721 */
722static void
723done(void)
724{
725	if (incomment)
726		error("EOF in comment");
727	closeio();
728}
729
730/*
731 * Write a line to the output or not, according to command line options.
732 * If writing fails, closeio() will print the error and exit.
733 */
734static void
735flushline(bool keep)
736{
737	if (symlist)
738		return;
739	if (keep ^ complement) {
740		bool blankline = tline[strspn(tline, " \t\r\n")] == '\0';
741		if (blankline && compblank && blankcount != blankmax) {
742			delcount += 1;
743			blankcount += 1;
744		} else {
745			if (lnnum && delcount > 0)
746				hashline();
747			if (fputs(tline, output) == EOF)
748				closeio();
749			delcount = 0;
750			blankmax = blankcount = blankline ? blankcount + 1 : 0;
751		}
752	} else {
753		if (lnblank && fputs(newline, output) == EOF)
754			closeio();
755		altered = true;
756		delcount += 1;
757		blankcount = 0;
758	}
759	if (debugging && fflush(output) == EOF)
760		closeio();
761}
762
763/*
764 * Format of #line directives depends on whether we know the input filename.
765 */
766static void
767hashline(void)
768{
769	int e;
770
771	if (linefile == NULL)
772		e = fprintf(output, "#line %d%s", linenum, newline);
773	else
774		e = fprintf(output, "#line %d \"%s\"%s",
775		    linenum, linefile, newline);
776	if (e < 0)
777		closeio();
778}
779
780/*
781 * Flush the output and handle errors.
782 */
783static void
784closeio(void)
785{
786	/* Tidy up after findsym(). */
787	if (symdepth && !zerosyms)
788		printf("\n");
789	if (output != NULL && (ferror(output) || fclose(output) == EOF))
790			err(2, "%s: can't write to output", filename);
791	fclose(input);
792}
793
794/*
795 * The driver for the state machine.
796 */
797static void
798process(void)
799{
800	Linetype lineval = LT_PLAIN;
801	/* When compressing blank lines, act as if the file
802	   is preceded by a large number of blank lines. */
803	blankmax = blankcount = 1000;
804	zerosyms = true;
805	newline = NULL;
806	linenum = 0;
807	altered = false;
808	while (lineval != LT_EOF) {
809		lineval = parseline();
810		trans_table[ifstate[depth]][lineval]();
811		debug("process line %d %s -> %s depth %d",
812		    linenum, linetype_name[lineval],
813		    ifstate_name[ifstate[depth]], depth);
814	}
815	exitstat |= altered;
816}
817
818/*
819 * Parse a line and determine its type. We keep the preprocessor line
820 * parser state between calls in the global variable linestate, with
821 * help from skipcomment().
822 */
823static Linetype
824parseline(void)
825{
826	const char *cp;
827	struct macro *cursym;
828	Linetype retval;
829	Comment_state wascomment;
830
831	wascomment = incomment;
832	cp = skiphash();
833	if (cp == NULL)
834		return (LT_EOF);
835	if (newline == NULL) {
836		if (strrchr(tline, '\n') == strrchr(tline, '\r') + 1)
837			newline = newline_crlf;
838		else
839			newline = newline_unix;
840	}
841	if (*cp == '\0') {
842		retval = LT_PLAIN;
843		goto done;
844	}
845	keyword = tline + (cp - tline);
846	if ((cp = matchsym("ifdef", keyword)) != NULL ||
847	    (cp = matchsym("ifndef", keyword)) != NULL) {
848		cp = skipcomment(cp);
849		if ((cursym = findsym(&cp)) == NULL)
850			retval = LT_IF;
851		else {
852			retval = (keyword[2] == 'n')
853			    ? LT_FALSE : LT_TRUE;
854			if (cursym->value == NULL)
855				retval = (retval == LT_TRUE)
856				    ? LT_FALSE : LT_TRUE;
857			if (cursym->ignore)
858				retval = (retval == LT_TRUE)
859				    ? LT_TRUEI : LT_FALSEI;
860		}
861	} else if ((cp = matchsym("if", keyword)) != NULL)
862		retval = ifeval(&cp);
863	else if ((cp = matchsym("elif", keyword)) != NULL)
864		retval = linetype_if2elif(ifeval(&cp));
865	else if ((cp = matchsym("else", keyword)) != NULL)
866		retval = LT_ELSE;
867	else if ((cp = matchsym("endif", keyword)) != NULL)
868		retval = LT_ENDIF;
869	else {
870		cp = skipsym(keyword);
871		/* no way can we deal with a continuation inside a keyword */
872		if (strncmp(cp, "\\\r\n", 3) == 0 ||
873		    strncmp(cp, "\\\n", 2) == 0)
874			Eioccc();
875		cp = skipline(cp);
876		retval = LT_PLAIN;
877		goto done;
878	}
879	cp = skipcomment(cp);
880	if (*cp != '\0') {
881		cp = skipline(cp);
882		if (retval == LT_TRUE || retval == LT_FALSE ||
883		    retval == LT_TRUEI || retval == LT_FALSEI)
884			retval = LT_IF;
885		if (retval == LT_ELTRUE || retval == LT_ELFALSE)
886			retval = LT_ELIF;
887	}
888	/* the following can happen if the last line of the file lacks a
889	   newline or if there is too much whitespace in a directive */
890	if (linestate == LS_HASH) {
891		long len = cp - tline;
892		if (fgets(tline + len, MAXLINE - len, input) == NULL) {
893			if (ferror(input))
894				err(2, "can't read %s", filename);
895			/* append the missing newline at eof */
896			strcpy(tline + len, newline);
897			cp += strlen(newline);
898			linestate = LS_START;
899		} else {
900			linestate = LS_DIRTY;
901		}
902	}
903	if (retval != LT_PLAIN && (wascomment || linestate != LS_START)) {
904		retval = linetype_2dodgy(retval);
905		linestate = LS_DIRTY;
906	}
907done:
908	debug("parser line %d state %s comment %s line", linenum,
909	    comment_name[incomment], linestate_name[linestate]);
910	return (retval);
911}
912
913/*
914 * These are the binary operators that are supported by the expression
915 * evaluator.
916 */
917static Linetype op_strict(long *p, long v, Linetype at, Linetype bt) {
918	if(at == LT_IF || bt == LT_IF) return (LT_IF);
919	return (*p = v, v ? LT_TRUE : LT_FALSE);
920}
921static Linetype op_lt(long *p, Linetype at, long a, Linetype bt, long b) {
922	return op_strict(p, a < b, at, bt);
923}
924static Linetype op_gt(long *p, Linetype at, long a, Linetype bt, long b) {
925	return op_strict(p, a > b, at, bt);
926}
927static Linetype op_le(long *p, Linetype at, long a, Linetype bt, long b) {
928	return op_strict(p, a <= b, at, bt);
929}
930static Linetype op_ge(long *p, Linetype at, long a, Linetype bt, long b) {
931	return op_strict(p, a >= b, at, bt);
932}
933static Linetype op_eq(long *p, Linetype at, long a, Linetype bt, long b) {
934	return op_strict(p, a == b, at, bt);
935}
936static Linetype op_ne(long *p, Linetype at, long a, Linetype bt, long b) {
937	return op_strict(p, a != b, at, bt);
938}
939static Linetype op_or(long *p, Linetype at, long a, Linetype bt, long b) {
940	if (!strictlogic && (at == LT_TRUE || bt == LT_TRUE))
941		return (*p = 1, LT_TRUE);
942	return op_strict(p, a || b, at, bt);
943}
944static Linetype op_and(long *p, Linetype at, long a, Linetype bt, long b) {
945	if (!strictlogic && (at == LT_FALSE || bt == LT_FALSE))
946		return (*p = 0, LT_FALSE);
947	return op_strict(p, a && b, at, bt);
948}
949static Linetype op_blsh(long *p, Linetype at, long a, Linetype bt, long b) {
950	return op_strict(p, a << b, at, bt);
951}
952static Linetype op_brsh(long *p, Linetype at, long a, Linetype bt, long b) {
953	return op_strict(p, a >> b, at, bt);
954}
955static Linetype op_add(long *p, Linetype at, long a, Linetype bt, long b) {
956	return op_strict(p, a + b, at, bt);
957}
958static Linetype op_sub(long *p, Linetype at, long a, Linetype bt, long b) {
959	return op_strict(p, a - b, at, bt);
960}
961static Linetype op_mul(long *p, Linetype at, long a, Linetype bt, long b) {
962	return op_strict(p, a * b, at, bt);
963}
964static Linetype op_div(long *p, Linetype at, long a, Linetype bt, long b) {
965	if (bt != LT_TRUE) {
966		debug("eval division by zero");
967		return (LT_ERROR);
968	}
969	return op_strict(p, a / b, at, bt);
970}
971static Linetype op_mod(long *p, Linetype at, long a, Linetype bt, long b) {
972	return op_strict(p, a % b, at, bt);
973}
974static Linetype op_bor(long *p, Linetype at, long a, Linetype bt, long b) {
975	return op_strict(p, a | b, at, bt);
976}
977static Linetype op_bxor(long *p, Linetype at, long a, Linetype bt, long b) {
978	return op_strict(p, a ^ b, at, bt);
979}
980static Linetype op_band(long *p, Linetype at, long a, Linetype bt, long b) {
981	return op_strict(p, a & b, at, bt);
982}
983
984/*
985 * An evaluation function takes three arguments, as follows: (1) a pointer to
986 * an element of the precedence table which lists the operators at the current
987 * level of precedence; (2) a pointer to an integer which will receive the
988 * value of the expression; and (3) a pointer to a char* that points to the
989 * expression to be evaluated and that is updated to the end of the expression
990 * when evaluation is complete. The function returns LT_FALSE if the value of
991 * the expression is zero, LT_TRUE if it is non-zero, LT_IF if the expression
992 * depends on an unknown symbol, or LT_ERROR if there is a parse failure.
993 */
994struct ops;
995
996typedef Linetype eval_fn(const struct ops *, long *, const char **);
997
998static eval_fn eval_table, eval_unary;
999
1000/*
1001 * The precedence table. Expressions involving binary operators are evaluated
1002 * in a table-driven way by eval_table. When it evaluates a subexpression it
1003 * calls the inner function with its first argument pointing to the next
1004 * element of the table. Innermost expressions have special non-table-driven
1005 * handling.
1006 *
1007 * The stop characters help with lexical analysis: an operator is not
1008 * recognized if it is followed by one of the stop characters because
1009 * that would make it a different operator.
1010 */
1011struct op {
1012	const char *str;
1013	Linetype (*fn)(long *, Linetype, long, Linetype, long);
1014	const char *stop;
1015};
1016struct ops {
1017	eval_fn *inner;
1018	struct op op[5];
1019};
1020static const struct ops eval_ops[] = {
1021	{ eval_table, { { "||", op_or, NULL } } },
1022	{ eval_table, { { "&&", op_and, NULL } } },
1023	{ eval_table, { { "|", op_bor, "|" } } },
1024	{ eval_table, { { "^", op_bxor, NULL } } },
1025	{ eval_table, { { "&", op_band, "&" } } },
1026	{ eval_table, { { "==", op_eq, NULL },
1027			{ "!=", op_ne, NULL } } },
1028	{ eval_table, { { "<=", op_le, NULL },
1029			{ ">=", op_ge, NULL },
1030			{ "<", op_lt, "<=" },
1031			{ ">", op_gt, ">=" } } },
1032	{ eval_table, { { "<<", op_blsh, NULL },
1033			{ ">>", op_brsh, NULL } } },
1034	{ eval_table, { { "+", op_add, NULL },
1035			{ "-", op_sub, NULL } } },
1036	{ eval_unary, { { "*", op_mul, NULL },
1037			{ "/", op_div, NULL },
1038			{ "%", op_mod, NULL } } },
1039};
1040
1041/* Current operator precedence level */
1042static long prec(const struct ops *ops)
1043{
1044	return (ops - eval_ops);
1045}
1046
1047/*
1048 * Function for evaluating the innermost parts of expressions,
1049 * viz. !expr (expr) number defined(symbol) symbol
1050 * We reset the constexpr flag in the last two cases.
1051 */
1052static Linetype
1053eval_unary(const struct ops *ops, long *valp, const char **cpp)
1054{
1055	const char *cp;
1056	char *ep;
1057	struct macro *sym;
1058	bool defparen;
1059	Linetype lt;
1060
1061	cp = skipcomment(*cpp);
1062	if (*cp == '!') {
1063		debug("eval%d !", prec(ops));
1064		cp++;
1065		lt = eval_unary(ops, valp, &cp);
1066		if (lt == LT_ERROR)
1067			return (LT_ERROR);
1068		if (lt != LT_IF) {
1069			*valp = !*valp;
1070			lt = *valp ? LT_TRUE : LT_FALSE;
1071		}
1072	} else if (*cp == '~') {
1073		debug("eval%d ~", prec(ops));
1074		cp++;
1075		lt = eval_unary(ops, valp, &cp);
1076		if (lt == LT_ERROR)
1077			return (LT_ERROR);
1078		if (lt != LT_IF) {
1079			*valp = ~(*valp);
1080			lt = *valp ? LT_TRUE : LT_FALSE;
1081		}
1082	} else if (*cp == '-') {
1083		debug("eval%d -", prec(ops));
1084		cp++;
1085		lt = eval_unary(ops, valp, &cp);
1086		if (lt == LT_ERROR)
1087			return (LT_ERROR);
1088		if (lt != LT_IF) {
1089			*valp = -(*valp);
1090			lt = *valp ? LT_TRUE : LT_FALSE;
1091		}
1092	} else if (*cp == '(') {
1093		cp++;
1094		debug("eval%d (", prec(ops));
1095		lt = eval_table(eval_ops, valp, &cp);
1096		if (lt == LT_ERROR)
1097			return (LT_ERROR);
1098		cp = skipcomment(cp);
1099		if (*cp++ != ')')
1100			return (LT_ERROR);
1101	} else if (isdigit((unsigned char)*cp)) {
1102		debug("eval%d number", prec(ops));
1103		*valp = strtol(cp, &ep, 0);
1104		if (ep == cp)
1105			return (LT_ERROR);
1106		lt = *valp ? LT_TRUE : LT_FALSE;
1107		cp = ep;
1108	} else if (matchsym("defined", cp) != NULL) {
1109		cp = skipcomment(cp+7);
1110		if (*cp == '(') {
1111			cp = skipcomment(cp+1);
1112			defparen = true;
1113		} else {
1114			defparen = false;
1115		}
1116		sym = findsym(&cp);
1117		cp = skipcomment(cp);
1118		if (defparen && *cp++ != ')') {
1119			debug("eval%d defined missing ')'", prec(ops));
1120			return (LT_ERROR);
1121		}
1122		if (sym == NULL) {
1123			debug("eval%d defined unknown", prec(ops));
1124			lt = LT_IF;
1125		} else {
1126			debug("eval%d defined %s", prec(ops), sym->name);
1127			*valp = (sym->value != NULL);
1128			lt = *valp ? LT_TRUE : LT_FALSE;
1129		}
1130		constexpr = false;
1131	} else if (!endsym(*cp)) {
1132		debug("eval%d symbol", prec(ops));
1133		sym = findsym(&cp);
1134		if (sym == NULL) {
1135			lt = LT_IF;
1136			cp = skipargs(cp);
1137		} else if (sym->value == NULL) {
1138			*valp = 0;
1139			lt = LT_FALSE;
1140		} else {
1141			*valp = strtol(sym->value, &ep, 0);
1142			if (*ep != '\0' || ep == sym->value)
1143				return (LT_ERROR);
1144			lt = *valp ? LT_TRUE : LT_FALSE;
1145			cp = skipargs(cp);
1146		}
1147		constexpr = false;
1148	} else {
1149		debug("eval%d bad expr", prec(ops));
1150		return (LT_ERROR);
1151	}
1152
1153	*cpp = cp;
1154	debug("eval%d = %d", prec(ops), *valp);
1155	return (lt);
1156}
1157
1158/*
1159 * Table-driven evaluation of binary operators.
1160 */
1161static Linetype
1162eval_table(const struct ops *ops, long *valp, const char **cpp)
1163{
1164	const struct op *op;
1165	const char *cp;
1166	long val = 0;
1167	Linetype lt, rt;
1168
1169	debug("eval%d", prec(ops));
1170	cp = *cpp;
1171	lt = ops->inner(ops+1, valp, &cp);
1172	if (lt == LT_ERROR)
1173		return (LT_ERROR);
1174	for (;;) {
1175		cp = skipcomment(cp);
1176		for (op = ops->op; op->str != NULL; op++) {
1177			if (strncmp(cp, op->str, strlen(op->str)) == 0) {
1178				/* assume only one-char operators have stop chars */
1179				if (op->stop != NULL && cp[1] != '\0' &&
1180				    strchr(op->stop, cp[1]) != NULL)
1181					continue;
1182				else
1183					break;
1184			}
1185		}
1186		if (op->str == NULL)
1187			break;
1188		cp += strlen(op->str);
1189		debug("eval%d %s", prec(ops), op->str);
1190		rt = ops->inner(ops+1, &val, &cp);
1191		if (rt == LT_ERROR)
1192			return (LT_ERROR);
1193		lt = op->fn(valp, lt, *valp, rt, val);
1194	}
1195
1196	*cpp = cp;
1197	debug("eval%d = %d", prec(ops), *valp);
1198	debug("eval%d lt = %s", prec(ops), linetype_name[lt]);
1199	return (lt);
1200}
1201
1202/*
1203 * Evaluate the expression on a #if or #elif line. If we can work out
1204 * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
1205 * return just a generic LT_IF.
1206 */
1207static Linetype
1208ifeval(const char **cpp)
1209{
1210	Linetype ret;
1211	long val = 0;
1212
1213	debug("eval %s", *cpp);
1214	constexpr = killconsts ? false : true;
1215	ret = eval_table(eval_ops, &val, cpp);
1216	debug("eval = %d", val);
1217	return (constexpr ? LT_IF : ret == LT_ERROR ? LT_IF : ret);
1218}
1219
1220/*
1221 * Read a line and examine its initial part to determine if it is a
1222 * preprocessor directive. Returns NULL on EOF, or a pointer to a
1223 * preprocessor directive name, or a pointer to the zero byte at the
1224 * end of the line.
1225 */
1226static const char *
1227skiphash(void)
1228{
1229	const char *cp;
1230
1231	linenum++;
1232	if (fgets(tline, MAXLINE, input) == NULL) {
1233		if (ferror(input))
1234			err(2, "can't read %s", filename);
1235		else
1236			return (NULL);
1237	}
1238	cp = skipcomment(tline);
1239	if (linestate == LS_START && *cp == '#') {
1240		linestate = LS_HASH;
1241		return (skipcomment(cp + 1));
1242	} else if (*cp == '\0') {
1243		return (cp);
1244	} else {
1245		return (skipline(cp));
1246	}
1247}
1248
1249/*
1250 * Mark a line dirty and consume the rest of it, keeping track of the
1251 * lexical state.
1252 */
1253static const char *
1254skipline(const char *cp)
1255{
1256	const char *pcp;
1257	if (*cp != '\0')
1258		linestate = LS_DIRTY;
1259	while (*cp != '\0') {
1260		cp = skipcomment(pcp = cp);
1261		if (pcp == cp)
1262			cp++;
1263	}
1264	return (cp);
1265}
1266
1267/*
1268 * Skip over comments, strings, and character literals and stop at the
1269 * next character position that is not whitespace. Between calls we keep
1270 * the comment state in the global variable incomment, and we also adjust
1271 * the global variable linestate when we see a newline.
1272 * XXX: doesn't cope with the buffer splitting inside a state transition.
1273 */
1274static const char *
1275skipcomment(const char *cp)
1276{
1277	if (text || ignoring[depth]) {
1278		for (; isspace((unsigned char)*cp); cp++)
1279			if (*cp == '\n')
1280				linestate = LS_START;
1281		return (cp);
1282	}
1283	while (*cp != '\0')
1284		/* don't reset to LS_START after a line continuation */
1285		if (strncmp(cp, "\\\r\n", 3) == 0)
1286			cp += 3;
1287		else if (strncmp(cp, "\\\n", 2) == 0)
1288			cp += 2;
1289		else switch (incomment) {
1290		case NO_COMMENT:
1291			if (strncmp(cp, "/\\\r\n", 4) == 0) {
1292				incomment = STARTING_COMMENT;
1293				cp += 4;
1294			} else if (strncmp(cp, "/\\\n", 3) == 0) {
1295				incomment = STARTING_COMMENT;
1296				cp += 3;
1297			} else if (strncmp(cp, "/*", 2) == 0) {
1298				incomment = C_COMMENT;
1299				cp += 2;
1300			} else if (strncmp(cp, "//", 2) == 0) {
1301				incomment = CXX_COMMENT;
1302				cp += 2;
1303			} else if (strncmp(cp, "\'", 1) == 0) {
1304				incomment = CHAR_LITERAL;
1305				linestate = LS_DIRTY;
1306				cp += 1;
1307			} else if (strncmp(cp, "\"", 1) == 0) {
1308				incomment = STRING_LITERAL;
1309				linestate = LS_DIRTY;
1310				cp += 1;
1311			} else if (strncmp(cp, "\n", 1) == 0) {
1312				linestate = LS_START;
1313				cp += 1;
1314			} else if (strchr(" \r\t", *cp) != NULL) {
1315				cp += 1;
1316			} else
1317				return (cp);
1318			continue;
1319		case CXX_COMMENT:
1320			if (strncmp(cp, "\n", 1) == 0) {
1321				incomment = NO_COMMENT;
1322				linestate = LS_START;
1323			}
1324			cp += 1;
1325			continue;
1326		case CHAR_LITERAL:
1327		case STRING_LITERAL:
1328			if ((incomment == CHAR_LITERAL && cp[0] == '\'') ||
1329			    (incomment == STRING_LITERAL && cp[0] == '\"')) {
1330				incomment = NO_COMMENT;
1331				cp += 1;
1332			} else if (cp[0] == '\\') {
1333				if (cp[1] == '\0')
1334					cp += 1;
1335				else
1336					cp += 2;
1337			} else if (strncmp(cp, "\n", 1) == 0) {
1338				if (incomment == CHAR_LITERAL)
1339					error("Unterminated char literal");
1340				else
1341					error("Unterminated string literal");
1342			} else
1343				cp += 1;
1344			continue;
1345		case C_COMMENT:
1346			if (strncmp(cp, "*\\\r\n", 4) == 0) {
1347				incomment = FINISHING_COMMENT;
1348				cp += 4;
1349			} else if (strncmp(cp, "*\\\n", 3) == 0) {
1350				incomment = FINISHING_COMMENT;
1351				cp += 3;
1352			} else if (strncmp(cp, "*/", 2) == 0) {
1353				incomment = NO_COMMENT;
1354				cp += 2;
1355			} else
1356				cp += 1;
1357			continue;
1358		case STARTING_COMMENT:
1359			if (*cp == '*') {
1360				incomment = C_COMMENT;
1361				cp += 1;
1362			} else if (*cp == '/') {
1363				incomment = CXX_COMMENT;
1364				cp += 1;
1365			} else {
1366				incomment = NO_COMMENT;
1367				linestate = LS_DIRTY;
1368			}
1369			continue;
1370		case FINISHING_COMMENT:
1371			if (*cp == '/') {
1372				incomment = NO_COMMENT;
1373				cp += 1;
1374			} else
1375				incomment = C_COMMENT;
1376			continue;
1377		default:
1378			abort(); /* bug */
1379		}
1380	return (cp);
1381}
1382
1383/*
1384 * Skip macro arguments.
1385 */
1386static const char *
1387skipargs(const char *cp)
1388{
1389	const char *ocp = cp;
1390	int level = 0;
1391	cp = skipcomment(cp);
1392	if (*cp != '(')
1393		return (cp);
1394	do {
1395		if (*cp == '(')
1396			level++;
1397		if (*cp == ')')
1398			level--;
1399		cp = skipcomment(cp+1);
1400	} while (level != 0 && *cp != '\0');
1401	if (level == 0)
1402		return (cp);
1403	else
1404	/* Rewind and re-detect the syntax error later. */
1405		return (ocp);
1406}
1407
1408/*
1409 * Skip over an identifier.
1410 */
1411static const char *
1412skipsym(const char *cp)
1413{
1414	while (!endsym(*cp))
1415		++cp;
1416	return (cp);
1417}
1418
1419/*
1420 * Skip whitespace and take a copy of any following identifier.
1421 */
1422static const char *
1423getsym(const char **cpp)
1424{
1425	const char *cp = *cpp, *sym;
1426
1427	cp = skipcomment(cp);
1428	cp = skipsym(sym = cp);
1429	if (cp == sym)
1430		return NULL;
1431	*cpp = cp;
1432	return (xstrdup(sym, cp));
1433}
1434
1435/*
1436 * Check that s (a symbol) matches the start of t, and that the
1437 * following character in t is not a symbol character. Returns a
1438 * pointer to the following character in t if there is a match,
1439 * otherwise NULL.
1440 */
1441static const char *
1442matchsym(const char *s, const char *t)
1443{
1444	while (*s != '\0' && *t != '\0')
1445		if (*s != *t)
1446			return (NULL);
1447		else
1448			++s, ++t;
1449	if (*s == '\0' && endsym(*t))
1450		return(t);
1451	else
1452		return(NULL);
1453}
1454
1455/*
1456 * Look for the symbol in the symbol table. If it is found, we return
1457 * the symbol table index, else we return -1.
1458 */
1459static struct macro *
1460findsym(const char **strp)
1461{
1462	const char *str;
1463	char *strkey;
1464	struct macro key, *res;
1465
1466	str = *strp;
1467	*strp = skipsym(str);
1468	if (symlist) {
1469		if (*strp == str)
1470			return (NULL);
1471		if (symdepth && firstsym)
1472			printf("%s%3d", zerosyms ? "" : "\n", depth);
1473		firstsym = zerosyms = false;
1474		printf("%s%.*s%s",
1475		       symdepth ? " " : "",
1476		       (int)(*strp-str), str,
1477		       symdepth ? "" : "\n");
1478		/* we don't care about the value of the symbol */
1479		return (NULL);
1480	}
1481
1482	/*
1483	 * 'str' just points into the current mid-parse input and is not
1484	 * nul-terminated.  We know the length of the symbol, *strp - str, but
1485	 * need to provide a nul-terminated lookup key for RB_FIND's comparison
1486	 * function.  Create one here.
1487	 */
1488	strkey = malloc(*strp - str + 1);
1489	memcpy(strkey, str, *strp - str);
1490	strkey[*strp - str] = 0;
1491
1492	key.name = strkey;
1493	res = RB_FIND(MACROMAP, &macro_tree, &key);
1494	if (res != NULL)
1495		debugsym("findsym", res);
1496
1497	free(strkey);
1498	return (res);
1499}
1500
1501/*
1502 * Resolve indirect symbol values to their final definitions.
1503 */
1504static void
1505indirectsym(void)
1506{
1507	const char *cp;
1508	int changed;
1509	struct macro *sym, *ind;
1510
1511	do {
1512		changed = 0;
1513		RB_FOREACH(sym, MACROMAP, &macro_tree) {
1514			if (sym->value == NULL)
1515				continue;
1516			cp = sym->value;
1517			ind = findsym(&cp);
1518			if (ind == NULL || ind == sym ||
1519			    *cp != '\0' ||
1520			    ind->value == NULL ||
1521			    ind->value == sym->value)
1522				continue;
1523			debugsym("indir...", sym);
1524			sym->value = ind->value;
1525			debugsym("...ectsym", sym);
1526			changed++;
1527		}
1528	} while (changed);
1529}
1530
1531/*
1532 * Add a symbol to the symbol table, specified with the format sym=val
1533 */
1534static void
1535addsym1(bool ignorethis, bool definethis, char *symval)
1536{
1537	const char *sym, *val;
1538
1539	sym = symval;
1540	val = skipsym(sym);
1541	if (definethis && *val == '=') {
1542		symval[val - sym] = '\0';
1543		val = val + 1;
1544	} else if (*val == '\0') {
1545		val = definethis ? "1" : NULL;
1546	} else {
1547		usage();
1548	}
1549	addsym2(ignorethis, sym, val);
1550}
1551
1552/*
1553 * Add a symbol to the symbol table.
1554 */
1555static void
1556addsym2(bool ignorethis, const char *symname, const char *val)
1557{
1558	const char *cp = symname;
1559	struct macro *sym, *r;
1560
1561	sym = findsym(&cp);
1562	if (sym == NULL) {
1563		sym = calloc(1, sizeof(*sym));
1564		sym->ignore = ignorethis;
1565		sym->name = symname;
1566		sym->value = val;
1567		r = RB_INSERT(MACROMAP, &macro_tree, sym);
1568		assert(r == NULL);
1569	}
1570	debugsym("addsym", sym);
1571}
1572
1573static void
1574debugsym(const char *why, const struct macro *sym)
1575{
1576	debug("%s %s%c%s", why, sym->name,
1577	    sym->value ? '=' : ' ',
1578	    sym->value ? sym->value : "undef");
1579}
1580
1581/*
1582 * Add symbols to the symbol table from a file containing
1583 * #define and #undef preprocessor directives.
1584 */
1585static void
1586defundefile(const char *fn)
1587{
1588	filename = fn;
1589	input = fopen(fn, "rb");
1590	if (input == NULL)
1591		err(2, "can't open %s", fn);
1592	linenum = 0;
1593	while (defundef())
1594		;
1595	if (ferror(input))
1596		err(2, "can't read %s", filename);
1597	else
1598		fclose(input);
1599	if (incomment)
1600		error("EOF in comment");
1601}
1602
1603/*
1604 * Read and process one #define or #undef directive
1605 */
1606static bool
1607defundef(void)
1608{
1609	const char *cp, *kw, *sym, *val, *end;
1610
1611	cp = skiphash();
1612	if (cp == NULL)
1613		return (false);
1614	if (*cp == '\0')
1615		goto done;
1616	/* strip trailing whitespace, and do a fairly rough check to
1617	   avoid unsupported multi-line preprocessor directives */
1618	end = cp + strlen(cp);
1619	while (end > tline && strchr(" \t\n\r", end[-1]) != NULL)
1620		--end;
1621	if (end > tline && end[-1] == '\\')
1622		Eioccc();
1623
1624	kw = cp;
1625	if ((cp = matchsym("define", kw)) != NULL) {
1626		sym = getsym(&cp);
1627		if (sym == NULL)
1628			error("Missing macro name in #define");
1629		if (*cp == '(') {
1630			val = "1";
1631		} else {
1632			cp = skipcomment(cp);
1633			val = (cp < end) ? xstrdup(cp, end) : "";
1634		}
1635		debug("#define");
1636		addsym2(false, sym, val);
1637	} else if ((cp = matchsym("undef", kw)) != NULL) {
1638		sym = getsym(&cp);
1639		if (sym == NULL)
1640			error("Missing macro name in #undef");
1641		cp = skipcomment(cp);
1642		debug("#undef");
1643		addsym2(false, sym, NULL);
1644	} else {
1645		error("Unrecognized preprocessor directive");
1646	}
1647	skipline(cp);
1648done:
1649	debug("parser line %d state %s comment %s line", linenum,
1650	    comment_name[incomment], linestate_name[linestate]);
1651	return (true);
1652}
1653
1654/*
1655 * Concatenate two strings into new memory, checking for failure.
1656 */
1657static char *
1658astrcat(const char *s1, const char *s2)
1659{
1660	char *s;
1661	int len;
1662	size_t size;
1663
1664	len = snprintf(NULL, 0, "%s%s", s1, s2);
1665	if (len < 0)
1666		err(2, "snprintf");
1667	size = (size_t)len + 1;
1668	s = (char *)malloc(size);
1669	if (s == NULL)
1670		err(2, "malloc");
1671	snprintf(s, size, "%s%s", s1, s2);
1672	return (s);
1673}
1674
1675/*
1676 * Duplicate a segment of a string, checking for failure.
1677 */
1678static const char *
1679xstrdup(const char *start, const char *end)
1680{
1681	size_t n;
1682	char *s;
1683
1684	if (end < start) abort(); /* bug */
1685	n = (size_t)(end - start) + 1;
1686	s = malloc(n);
1687	if (s == NULL)
1688		err(2, "malloc");
1689	snprintf(s, n, "%s", start);
1690	return (s);
1691}
1692
1693/*
1694 * Diagnostics.
1695 */
1696static void
1697debug(const char *msg, ...)
1698{
1699	va_list ap;
1700
1701	if (debugging) {
1702		va_start(ap, msg);
1703		vwarnx(msg, ap);
1704		va_end(ap);
1705	}
1706}
1707
1708static void
1709error(const char *msg)
1710{
1711	if (depth == 0)
1712		warnx("%s: %d: %s", filename, linenum, msg);
1713	else
1714		warnx("%s: %d: %s (#if line %d depth %d)",
1715		    filename, linenum, msg, stifline[depth], depth);
1716	closeio();
1717	errx(2, "Output may be truncated");
1718}
1719