1/***********************************************************************
2*                                                                      *
3*               This software is part of the ast package               *
4*          Copyright (c) 1986-2009 AT&T Intellectual Property          *
5*                      and is licensed under the                       *
6*                  Common Public License, Version 1.0                  *
7*                    by AT&T Intellectual Property                     *
8*                                                                      *
9*                A copy of the License is available at                 *
10*            http://www.opensource.org/licenses/cpl1.0.txt             *
11*         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12*                                                                      *
13*              Information and Software Systems Research               *
14*                            AT&T Research                             *
15*                           Florham Park NJ                            *
16*                                                                      *
17*                 Glenn Fowler <gsf@research.att.com>                  *
18*                                                                      *
19***********************************************************************/
20#pragma prototyped
21/*
22 * Glenn Fowler
23 * AT&T Research
24 *
25 * preprocessor and proto lexical analyzer fsm
26 * define PROTOMAIN for standalone proto
27 */
28
29#include "pplib.h"
30#include "ppfsm.h"
31
32/*
33 * lexical FSM encoding
34 * derived from a standalone ansi cpp by Dennis Ritchie
35 * modified for libpp by Glenn Fowler
36 *
37 *   fsm[] is initialized from fsminit[].  The encoding is blown out into
38 *   fsm[] for time efficiency.  When in state state, and one of the
39 *   characters in ch arrives, enter nextstate.  States >= TERMINAL are
40 *   either final, or at least require special action.  In fsminit[] there
41 *   is a line for each <state,charset,nextstate>.  Early entries are
42 *   overwritten by later ones.  C_XXX is the universal set and should
43 *   always be first.  Some of the fsminit[] entries are templates for
44 *   groups of states.  The OP entries trigger the state copies.  States
45 *   above TERMINAL are represented in fsm[] as negative values.  S_TOK and
46 *   S_TOKB encode the resulting token type in the upper bits.  These actions
47 *   differ in that S_TOKB has a lookahead char.
48 *
49 *   fsm[] has three start states:
50 *
51 *	PROTO	proto (ANSI -> K&R,C++,ANSI)
52 *	QUICK	standalone ppcpp()
53 *	TOKEN	tokenizing pplex()
54 *
55 *   If the next state remains the same then the fsm[] transition value is 0.
56 *   MAX+1 is a power of 2 so that fsm[state][EOF==MAX+1] actually accesses
57 *   fsm[state+1][0] which is ~S_EOB for all states.  This preserves the
58 *   power of 2 fsm[] row size for efficient array indexing.  Thanks to
59 *   D. G. Korn for the last two observations.  The pseudo non-terminal state
60 *   fsm[TERMINAL][state+1] is used to differentiate EOB from EOF.
61 *
62 *   The bit layout is:
63 *
64 *	TERM	arg	SPLICE	next
65 *	15	14-8	7	6-0
66 */
67
68/*
69 * NOTE: these must be `control' characters for all native codesets
70 *       currently ok for {ascii,ebcdic1,ebcdic2,ebcdic3}
71 */
72
73#define C_DEC		001
74#define C_EOF		002
75#define C_HEX		003
76#define C_LET		021
77#define C_OCT		022
78#define C_XXX		023
79
80#define OP		(-1)
81#define END		0
82#define COPY		1
83
84#define copy(t,f)	(memcpy(&fsm[t][1],&fsm[f][1],(MAX+1)*sizeof(short)),fsm[TERMINAL][(t)+1]=fsm[TERMINAL][(f)+1])
85
86struct fsminit				/* fsm initialization row	*/
87{
88	int		state;		/* if in this state		*/
89	unsigned char	ch[4];		/* and see one of these		*/
90	int		nextstate;	/* enter this state if <TERMINAL*/
91};
92
93static struct fsminit	fsminit[] =
94{
95	/* proto start state */
96	{	PROTO,	{ C_XXX },		S_CHR,			},
97	{	PROTO,	{ C_EOF },		S_EOF,			},
98	{	PROTO,	{ C_DEC },		BAD1,			},
99	{	PROTO,	{ '.' },		DOT,			},
100	{	PROTO,	{ C_LET },		NID,			},
101	{	PROTO,	{ 'L' },		LIT,			},
102	{	PROTO,	{ 'd', 'e', 'f', 'i' },	RES1,			},
103	{	PROTO,	{ 'r', 's', 't', 'v' },	RES1,			},
104	{	PROTO,	{ 'w', 'N' },		RES1,			},
105	{	PROTO,	{ '"', '\'' },		S_LITBEG,		},
106	{	PROTO,	{ '/' },		COM1,			},
107	{	PROTO,	{ '\n' },		S_NL,			},
108	{	PROTO,	{ ' ','\t','\f','\v' },	WS1,			},
109
110/* proto {do,else,extern,for,if,inline,return,static,typedef,va_start,void,while,NoN} */
111	{	RES1,	{ C_XXX },		S_MACRO,		},
112	{	RES1,	{ C_LET, C_DEC },	NID,			},
113	{	RES1,	{ 'a' },		RES1a,			},
114	{	RES1,	{ 'e' },		RES1e,			},
115	{	RES1,	{ 'f' },		RES1f,			},
116	{	RES1,	{ 'h' },		RES1h,			},
117	{	RES1,	{ 'l' },		RES1l,			},
118	{	RES1,	{ 'n' },		RES1n,			},
119	{	RES1,	{ 'o' },		RES1o,			},
120	{	RES1,	{ 't' },		RES1t,			},
121	{	RES1,	{ 'x' },		RES1x,			},
122	{	RES1,	{ 'y' },		RES1y,			},
123
124	/* proto reserved {va_start} */
125	{	RES1a,	{ C_XXX },		S_RESERVED,		},
126	{	RES1a,	{ C_LET, C_DEC },	NID,			},
127	{	RES1a,	{ '_','s','t','a' },	RES1a,			},
128	{	RES1a,	{ 'r' },		RES1a,			},
129
130	/* proto reserved {return} */
131	{	RES1e,	{ C_XXX },		S_RESERVED,		},
132	{	RES1e,	{ C_LET, C_DEC },	NID,			},
133	{	RES1e,	{ 't','u','r','n' },	RES1e,			},
134
135	/* proto reserved {if} */
136	{	RES1f,	{ C_XXX },		S_RESERVED,		},
137	{	RES1f,	{ C_LET, C_DEC },	NID,			},
138
139	/* proto reserved {while} */
140	{	RES1h,	{ C_XXX },		S_RESERVED,		},
141	{	RES1h,	{ C_LET, C_DEC },	NID,			},
142	{	RES1h,	{ 'i','l','e' },	RES1h,			},
143
144	/* proto reserved {else} */
145	{	RES1l,	{ C_XXX },		S_RESERVED,		},
146	{	RES1l,	{ C_LET, C_DEC },	NID,			},
147	{	RES1l,	{ 's','e' },		RES1l,			},
148
149	/* proto reserved {inline} */
150	{	RES1n,	{ C_XXX },		S_RESERVED,		},
151	{	RES1n,	{ C_LET, C_DEC },	NID,			},
152	{	RES1n,	{ 'l','i','n','e' },	RES1n,			},
153
154	/* proto reserved {do,for,void} */
155	{	RES1o,	{ C_XXX },		S_RESERVED,		},
156	{	RES1o,	{ C_LET, C_DEC },	NID,			},
157	{	RES1o,	{ 'r','i','d','N' },	RES1o,			},
158
159	/* proto reserved {static} */
160	{	RES1t,	{ C_XXX },		S_RESERVED,		},
161	{	RES1t,	{ C_LET, C_DEC },	NID,			},
162	{	RES1t,	{ 'a','t','i','c' },	RES1t,			},
163
164	/* proto reserved {extern} */
165	{	RES1x,	{ C_XXX },		S_RESERVED,		},
166	{	RES1x,	{ C_LET, C_DEC },	NID,			},
167	{	RES1x,	{ 't','e','r','n' },	RES1x,			},
168
169	/* proto reserved {typedef} */
170	{	RES1y,	{ C_XXX },		S_RESERVED,		},
171	{	RES1y,	{ C_LET, C_DEC },	NID,			},
172	{	RES1y,	{ 'p','e','d','f' },	RES1y,			},
173
174	/* saw /, perhaps start of comment */
175	{	COM1,	{ C_XXX },		S_CHRB,			},
176	{	COM1,	{ '*' },		COM2,			},
177#if PROTOMAIN
178	{	COM1,	{ '/' },		COM5,			},
179#endif
180
181	/* saw / *, start of comment */
182	{	COM2,	{ C_XXX },		COM2,			},
183	{	COM2,	{ '\n', C_EOF },	S_COMMENT,		},
184	{	COM2,	{ '/' },		COM4,			},
185	{	COM2,	{ '*' },		COM3,			},
186	{	COM2,	{ '#', ';', ')' },	QUAL(COM2),		},
187
188	/* saw the * possibly ending a comment */
189	{	COM3,	{ C_XXX },		COM2,			},
190	{	COM3,	{ '\n', C_EOF },	S_COMMENT,		},
191	{	COM3,	{ '#', ';', ')' },	QUAL(COM2),		},
192	{	COM3,	{ '*' },		COM3,			},
193	{	COM3,	{ '/' },		S_COMMENT,		},
194
195	/* saw / in / * comment, possible malformed nest */
196	{	COM4,	{ C_XXX },		COM2,			},
197	{	COM4,	{ '*', '\n', C_EOF },	S_COMMENT,		},
198	{	COM4,	{ '/' },		COM4,			},
199
200	/* saw / /, start of comment */
201	{	COM5,	{ C_XXX },		COM5,			},
202	{	COM5,	{ '\n', C_EOF },	S_COMMENT,		},
203	{	COM5,	{ '/' },		COM6,			},
204	{	COM5,	{ '*' },		COM7,			},
205
206	/* saw / in / / comment, possible malformed nest */
207	{	COM6,	{ C_XXX },		COM5,			},
208	{	COM6,	{ '*', '\n', C_EOF },	S_COMMENT,		},
209	{	COM6,	{ '/' },		COM6,			},
210
211	/* saw * in / /, possible malformed nest */
212	{	COM7,	{ C_XXX },		COM5,			},
213	{	COM7,	{ '\n', C_EOF },	S_COMMENT,		},
214	{	COM7,	{ '*' },		COM7,			},
215	{	COM7,	{ '/' },		S_COMMENT,		},
216
217	/* normal identifier -- always a macro candidate */
218	{	NID,	{ C_XXX },		S_MACRO,		},
219	{	NID,	{ C_LET, C_DEC },	NID,			},
220
221	/* saw ., operator or dbl constant */
222	{	DOT,	{ C_XXX },		S_CHRB,			},
223	{	DOT,	{ '.' },		DOT2,			},
224	{	DOT,	{ C_DEC },		BAD1,			},
225
226	/* saw .., possible ... */
227	{	DOT2,	{ C_XXX },		BACK(T_INVALID),	},
228	{	DOT2,	{ '.' },		KEEP(T_VARIADIC),	},
229
230	/* saw L (possible start of normal wide literal) */
231	{	LIT,	{ C_XXX },		S_MACRO,		},
232	{	LIT,	{ C_LET, C_DEC },	NID,			},
233	{	LIT,	{ '"', '\'' },		QUAL(LIT1),		},
234
235	/* saw " or ' beginning literal */
236	{	LIT1,	{ C_XXX },		LIT1,			},
237	{	LIT1,	{ '"', '\'' },		S_LITEND,		},
238	{	LIT1,	{ '\n', C_EOF },	S_LITEND,		},
239	{	LIT1,	{ '\\' },		LIT2,			},
240
241	/* saw \ in literal */
242	{	LIT2,	{ C_XXX },		S_LITESC,		},
243	{	LIT2,	{ '\n', C_EOF },	S_LITEND,		},
244
245	/* eat malformed numeric constant */
246	{	BAD1,	{ C_XXX },		BACK(T_INVALID),	},
247	{	BAD1,	{ C_LET, C_DEC, '.' },	BAD1,			},
248	{	BAD1,	{ 'e', 'E' },		BAD2,			},
249
250	/* eat malformed numeric fraction|exponent */
251	{	BAD2,	{ C_XXX },		BACK(T_INVALID),	},
252	{	BAD2,	{ C_LET, C_DEC, '.' },	BAD1,			},
253	{	BAD2,	{ '+', '-' },		BAD1,			},
254
255	/* saw white space, eat it up */
256	{	WS1,	{ C_XXX },		S_WS,			},
257	{	WS1,	{ ' ', '\t' },		WS1,			},
258	{	WS1,	{ '\f', '\v' },		S_VS,			},
259
260#if !PROTOMAIN
261
262	/* quick template */
263	{	QUICK,	{ C_XXX },		QTOK,			},
264	{	QUICK,	{ C_EOF, MARK },	S_CHRB,			},
265	{	QUICK,	{ C_LET, C_DEC },	QID,			},
266	{	QUICK,	{ 'L' },		LIT0,			},
267	{	QUICK,	{ '"', '\'' },		S_LITBEG,		},
268	{	QUICK,	{ '/' },		S_CHRB,			},
269	{	QUICK,	{ '*' },		QCOM,			},
270	{	QUICK,	{ '#' },		SHARP1,			},
271	{	QUICK,	{ '\n' },		S_NL,			},
272	{	QUICK,	{ '\f', '\v' },		S_VS,			},
273
274	/* copy QUICK to QUICK+1 through MAC0+1 */
275	{	OP,	{QUICK,QUICK+1,MAC0+1},	COPY,			},
276
277	/* quick start state */
278	{	QUICK,	{ C_EOF },		S_EOF,			},
279	{	QUICK,	{ C_DEC },		QNUM,			},
280	{	QUICK,	{ MARK },		QTOK,			},
281	{	QUICK,	{ '/' },		COM1,			},
282	{	QUICK,	{ ' ', '\t' },		QUICK,			},
283
284	/* grab non-macro tokens */
285	{	QTOK,	{ C_DEC },		QNUM,			},
286
287	/* grab numeric and invalid tokens */
288	{	QNUM,	{ C_LET, C_DEC, '.' },	QNUM,			},
289	{	QNUM,	{ 'e', 'E' },		QEXP,			},
290
291	/* grab exponent token */
292	{	QEXP,	{ C_LET, C_DEC, '.' },	QNUM,			},
293	{	QEXP,	{ '+', '-' },		QNUM,			},
294
295	/* saw *, grab possible bad comment terminator */
296	{	QCOM,	{ C_DEC },		QNUM,			},
297	{	QCOM,	{ '/' },		S_COMMENT,		},
298
299	/* saw L (possible start of wide string or first macro char) */
300	{	MAC0,	{ 'L' },		QID,			},
301	{	MAC0,	{ '"', '\'' },		QUAL(LIT1),		},
302
303	/* macro candidate template */
304	{	MAC0+1,	{ 'L' },		QID,			},
305
306	/* copy MAC0+1 to MAC0+2 through MACN */
307	{	OP,	{MAC0+1,MAC0+2,MACN},	COPY			},
308
309	/* saw L (possible start of wide string or macro L) */
310	{	HIT0,	{ C_XXX },		S_MACRO,		},
311	{	HIT0,	{ C_LET, C_DEC },	QID,			},
312	{	HIT0,	{ '"', '\'' },		QUAL(LIT1),		},
313
314	/* macro hit template */
315	{	HIT0+1,	{ C_XXX },		S_MACRO,		},
316	{	HIT0+1,	{ C_LET, C_DEC },	QID,			},
317
318	/* copy HIT0+1 to HIT0+2 through HITN */
319	{	OP,	{HIT0+1,HIT0+2,HITN},	COPY			},
320
321	/* saw L (possible start of wide literal) */
322	{	LIT0,	{ C_XXX },		S_MACRO,		},
323	{	LIT0,	{ C_LET, C_DEC },	QID,			},
324	{	LIT0,	{ '"', '\'' },		QUAL(LIT1),		},
325
326	/* (!PROTOMAIN COM1) saw /, perhaps start of comment or /= */
327	{	COM1,	{ '=' },		KEEP(T_DIVEQ),		},
328
329	/* normal start state */
330	{	TOKEN,	{ C_XXX },		S_HUH,			},
331	{	TOKEN,	{ C_EOF },		S_EOF,			},
332	{	TOKEN,	{ C_DEC },		DEC1,			},
333	{	TOKEN,	{ '0' },		OCT1,			},
334	{	TOKEN,	{ '.' },		DOT1,			},
335	{	TOKEN,	{ C_LET },		NID,			},
336	{	TOKEN,	{ 'L' },		LIT,			},
337	{	TOKEN,	{ '"', '\'', '<' },	S_LITBEG,		},
338	{	TOKEN,	{ '/' },		COM1,			},
339	{	TOKEN,	{ '\n' },		S_NL,			},
340	{	TOKEN,	{ ' ', '\t' },		WS1,			},
341	{	TOKEN,	{ '\f', '\v' },		S_VS,			},
342	{	TOKEN,	{ '#' },		SHARP1,			},
343	{	TOKEN,	{ ':' },		COLON1,			},
344	{	TOKEN,	{ '%' },		PCT1,			},
345	{	TOKEN,	{ '&' },		AND1,			},
346	{	TOKEN,	{ '*' },		STAR1,			},
347	{	TOKEN,	{ '+' },		PLUS1,			},
348	{	TOKEN,	{ '-' },		MINUS1,			},
349	{	TOKEN,	{ '=' },		EQ1,			},
350	{	TOKEN,	{ '!' },		NOT1,			},
351	{	TOKEN,	{ '>' },		GT1,			},
352	{	TOKEN,	{ '^' },		CIRC1,			},
353	{	TOKEN,	{ '|' },		OR1,			},
354	{	TOKEN,	{ '(', ')', '[', ']' },	S_CHR,			},
355	{	TOKEN,	{ '{', '}', ',', ';' },	S_CHR,			},
356	{	TOKEN,	{ '~', '?' },		S_CHR,			},
357
358	/* saw 0, possible oct|hex|dec|dbl constant */
359	{	OCT1,	{ C_XXX },		BACK(T_DECIMAL),	},
360	{	OCT1,	{ C_LET, C_DEC },	BAD1,			},
361	{	OCT1,	{ C_OCT },		OCT2,			},
362	{	OCT1,	{ 'e', 'E' },		DBL2,			},
363	{	OCT1,	{ 'l', 'L', 'u', 'U' },	QUAL(DEC2),		},
364	{	OCT1,	{ 'x', 'X' },		HEX1,			},
365	{	OCT1,	{ '.' },		DBL1,			},
366
367	/* saw 0<oct>, oct constant */
368	{	OCT2,	{ C_XXX },		BACK(T_OCTAL),		},
369	{	OCT2,	{ C_LET, C_DEC },	BAD1,			},
370	{	OCT2,	{ C_OCT },		OCT2,			},
371	{	OCT2,	{ 'e', 'E' },		DBL2,			},
372	{	OCT2,	{ 'l', 'L', 'u', 'U' },	QUAL(OCT3),		},
373	{	OCT2,	{ '.' },		DBL1,			},
374
375	/* oct constant qualifier */
376	{	OCT3,	{ C_XXX },		BACK(T_OCTAL),		},
377	{	OCT3,	{ C_LET, C_DEC, '.' },	BAD1,			},
378	{	OCT3,	{ 'l', 'L', 'u', 'U' },	QUAL(OCT3),		},
379
380	/* saw 0 [xX], hex constant */
381	{	HEX1,	{ C_XXX },		BACK(T_HEXADECIMAL),	},
382	{	HEX1,	{ C_LET },		BAD1,			},
383	{	HEX1,	{ C_HEX },		HEX1,			},
384	{	HEX1,	{ 'e', 'E' },		HEX3,			},
385	{	HEX1,	{ 'l', 'L', 'u', 'U' },	QUAL(HEX2),		},
386	{	HEX1,	{ '.' },		HEX4,			},
387	{	HEX1,	{ 'p', 'P' },		HEX5,			},
388
389	/* hex constant qualifier */
390	{	HEX2,	{ C_XXX },		BACK(T_HEXADECIMAL),	},
391	{	HEX2,	{ C_LET, C_DEC, '.' },	BAD1,			},
392	{	HEX2,	{ 'l', 'L', 'u', 'U' },	QUAL(HEX2),		},
393
394	/* hex [eE][-+] botch */
395	{	HEX3,	{ C_XXX },		BACK(T_HEXADECIMAL),	},
396	{	HEX3,	{ C_LET, '.', '-', '+'},BAD1,			},
397	{	HEX3,	{ C_HEX },		HEX1,			},
398	{	HEX3,	{ 'e', 'E' },		HEX3,			},
399	{	HEX3,	{ 'l', 'L', 'u', 'U' },	QUAL(HEX2),		},
400
401	/* hex dbl fraction */
402	{	HEX4,	{ C_XXX },		BACK(T_HEXDOUBLE),	},
403	{	HEX4,	{ C_LET, '.' },		BAD1,			},
404	{	HEX4,	{ C_HEX },		HEX4,			},
405	{	HEX4,	{ 'p', 'P' },		HEX5,			},
406	{	HEX4,	{ 'f', 'F', 'l', 'L' },	QUAL(HEX8),		},
407
408	/* optional hex dbl exponent sign */
409	{	HEX5,	{ C_XXX },		BACK(T_INVALID),	},
410	{	HEX5,	{ C_LET, '.' },		BAD1,			},
411	{	HEX5,	{ '+', '-' },		HEX6,			},
412	{	HEX5,	{ C_DEC },		HEX7,			},
413
414	/* mandatory hex dbl exponent first digit */
415	{	HEX6,	{ C_XXX },		BACK(T_INVALID),	},
416	{	HEX6,	{ C_LET, '.' },		BAD1,			},
417	{	HEX6,	{ C_DEC },		HEX7,			},
418
419	/* hex dbl exponent digits */
420	{	HEX7,	{ C_XXX },		BACK(T_HEXDOUBLE),	},
421	{	HEX7,	{ C_LET, '.' },		BAD1,			},
422	{	HEX7,	{ C_DEC },		HEX7,			},
423	{	HEX7,	{ 'f', 'F', 'l', 'L' },	QUAL(HEX8),		},
424
425	/* hex dbl constant qualifier */
426	{	HEX8,	{ C_XXX },		BACK(T_HEXDOUBLE),	},
427	{	HEX8,	{ C_LET, '.' },		BAD1,			},
428	{	HEX8,	{ 'f', 'F', 'l', 'L' },	QUAL(HEX8),		},
429
430	/* saw <dec>, dec constant */
431	{	DEC1,	{ C_XXX },		BACK(T_DECIMAL),	},
432	{	DEC1,	{ C_LET },		BAD1,			},
433	{	DEC1,	{ C_DEC },		DEC1,			},
434	{	DEC1,	{ 'e', 'E' },		DBL2,			},
435	{	DEC1,	{ 'l', 'L', 'u', 'U' },	QUAL(DEC2),		},
436	{	DEC1,	{ '.' },		DBL1,			},
437
438	/* dec constant qualifier */
439	{	DEC2,	{ C_XXX },		BACK(T_DECIMAL),	},
440	{	DEC2,	{ C_LET, C_DEC },	BAD1,			},
441	{	DEC2,	{ 'l', 'L', 'u', 'U' },	QUAL(DEC2),		},
442
443	/* saw ., operator or dbl constant */
444	{	DOT1,	{ C_XXX },		S_CHRB,			},
445	{	DOT1,	{ '.' },		DOT2,			},
446	{	DOT1,	{ C_DEC },		DBL1,			},
447
448	/* dbl fraction */
449	{	DBL1,	{ C_XXX },		BACK(T_DOUBLE),		},
450	{	DBL1,	{ C_LET, '.' },		BAD1,			},
451	{	DBL1,	{ C_DEC },		DBL1,			},
452	{	DBL1,	{ 'e', 'E' },		DBL2,			},
453	{	DBL1,	{ 'f', 'F', 'l', 'L' },	QUAL(DBL5),		},
454
455	/* optional dbl exponent sign */
456	{	DBL2,	{ C_XXX },		BACK(T_INVALID),	},
457	{	DBL2,	{ C_LET, '.' },		BAD1,			},
458	{	DBL2,	{ '+', '-' },		DBL3,			},
459	{	DBL2,	{ C_DEC },		DBL4,			},
460
461	/* mandatory dbl exponent first digit */
462	{	DBL3,	{ C_XXX },		BACK(T_INVALID),	},
463	{	DBL3,	{ C_LET, '.' },		BAD1,			},
464	{	DBL3,	{ C_DEC },		DBL4,			},
465
466	/* dbl exponent digits */
467	{	DBL4,	{ C_XXX },		BACK(T_DOUBLE),		},
468	{	DBL4,	{ C_LET, '.' },		BAD1,			},
469	{	DBL4,	{ C_DEC },		DBL4,			},
470	{	DBL4,	{ 'f', 'F', 'l', 'L' },	QUAL(DBL5),		},
471
472	/* dbl constant qualifier */
473	{	DBL5,	{ C_XXX },		BACK(T_DOUBLE),		},
474	{	DBL5,	{ C_LET, '.' },		BAD1,			},
475	{	DBL5,	{ 'f', 'F', 'l', 'L' },	QUAL(DBL5),		},
476
477	/* saw < starting include header */
478	{	HDR1,	{ C_XXX },		HDR1,			},
479	{	HDR1,	{ '>', '\n', C_EOF },	S_LITEND,		},
480
481	/* saw <binop><space> expecting = */
482	{	BIN1,	{ C_XXX },		S_HUH,			},
483	{	BIN1,	{ ' ', '\t' },		BIN1,			},
484
485	/* 2-char ops */
486
487	{	SHARP1,	{ C_XXX },		S_SHARP,		},
488
489	{	PCT1,	{ C_XXX },		S_CHRB,			},
490	{	PCT1,	{ '=' },		KEEP(T_MODEQ),		},
491
492	{	AND1,	{ C_XXX },		S_CHRB,			},
493	{	AND1,	{ '=' },		KEEP(T_ANDEQ),		},
494	{	AND1,	{ '&' },		KEEP(T_ANDAND),		},
495
496	{	STAR1,	{ C_XXX },		S_CHRB,			},
497	{	STAR1,	{ '=' },		KEEP(T_MPYEQ),		},
498	{	STAR1,	{ '/' },		S_COMMENT,		},
499
500	{	PLUS1,	{ C_XXX },		S_CHRB,			},
501	{	PLUS1,	{ '=' },		KEEP(T_ADDEQ),		},
502	{	PLUS1,	{ '+' },		KEEP(T_ADDADD),		},
503
504	{	MINUS1,	{ C_XXX },		S_CHRB,			},
505	{	MINUS1,	{ '=' },		KEEP(T_SUBEQ),		},
506	{	MINUS1,	{ '-' },		KEEP(T_SUBSUB),		},
507	{	MINUS1,	{ '>' },		KEEP(T_PTRMEM),		},
508
509	{	COLON1,	{ C_XXX },		S_CHRB,			},
510	{	COLON1,	{ '=', '>' },		S_HUH,			},
511
512	{	LT1,	{ C_XXX },		S_CHRB,			},
513	{	LT1,	{ '=' },		KEEP(T_LE),		},
514	{	LT1,	{ '<' },		LSH1,			},
515
516	{	EQ1,	{ C_XXX },		S_CHRB,			},
517	{	EQ1,	{ '=' },		KEEP(T_EQ),		},
518
519	{	NOT1,	{ C_XXX },		S_CHRB,			},
520	{	NOT1,	{ '=' },		KEEP(T_NE),		},
521
522	{	GT1,	{ C_XXX },		S_CHRB,			},
523	{	GT1,	{ '=' },		KEEP(T_GE),		},
524	{	GT1,	{ '>' },		RSH1,			},
525
526	{	CIRC1,	{ C_XXX },		S_CHRB,			},
527	{	CIRC1,	{ '=' },		KEEP(T_XOREQ),		},
528
529	{	OR1,	{ C_XXX },		S_CHRB,			},
530	{	OR1,	{ '=' },		KEEP(T_OREQ),		},
531	{	OR1,	{ '|' },		KEEP(T_OROR),		},
532
533	/* 3-char ops */
534
535	{	ARROW1,	{ C_XXX },		BACK(T_PTRMEM),		},
536	{	ARROW1,	{ '*' },		KEEP(T_PTRMEMREF),	},
537
538	{	LSH1,	{ C_XXX },		BACK(T_LSHIFT),		},
539	{	LSH1,	{ '=' },		KEEP(T_LSHIFTEQ),	},
540
541	{	RSH1,	{ C_XXX },		BACK(T_RSHIFT),		},
542	{	RSH1,	{ '=' },		KEEP(T_RSHIFTEQ),	},
543
544#endif
545
546	/* end */
547	{	OP,	{ 0 },			END,			}
548};
549
550short		fsm[TERMINAL+1][MAX+1];
551
552char		trigraph[MAX+1];
553
554#if PROTOMAIN
555static char	spl[] = { '\\', '\r', 0 };
556static char	aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_$@";
557#else
558static char	spl[] = { MARK, '?', '\\', '\r', CC_sub, 0 };
559static char	aln[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_";
560#endif
561static char*	let = &aln[10];
562static char	hex[] = "fedcbaFEDCBA9876543210";
563static char*	dec = &hex[12];
564static char*	oct = &hex[14];
565
566/*
567 * runtime FSM modifications
568 * ppfsm(FSM_INIT,0) must be called first
569 */
570
571void
572ppfsm(int op, register char* s)
573{
574	register int			c;
575	register int			n;
576	register int			i;
577	register short*			rp;
578	register struct fsminit*	fp;
579#if !PROTOMAIN
580	char*				t;
581	int				x;
582#endif
583
584	switch (op)
585	{
586
587#if !PROTOMAIN
588
589	case FSM_IDADD:
590		while (c = *s++)
591			if (!ppisid(c))
592			{
593				if (fsm[TOKEN][c] == ~S_HUH)
594				{
595					setid(c);
596					for (i = 0; i < TERMINAL; i++)
597						fsm[i][c] = IDSTATE(fsm[i]['_']);
598				}
599				else error(2, "%c: cannot add to identifier set", c);
600			}
601		break;
602
603	case FSM_IDDEL:
604		while (c = *s++)
605			if (ppisid(c))
606			{
607				clrid(c);
608				for (i = 0; i < TERMINAL; i++)
609					fsm[i][c] = ~S_HUH;
610			}
611		break;
612
613#endif
614
615	case FSM_INIT:
616		for (fp = fsminit;; fp++)
617		{
618			if ((n = fp->nextstate) >= TERMINAL) n = ~n;
619			if (fp->state == OP)
620			{
621#if !PROTOMAIN
622				switch (n)
623				{
624				case COPY:
625					c = fp->ch[0];
626					n = fp->ch[2];
627					for (i = fp->ch[1]; i <= n; i++)
628						copy(i, c);
629					continue;
630				default:
631					break;
632				}
633#endif
634				break;
635			}
636			rp = fsm[fp->state];
637			for (i = 0; i < sizeof(fp->ch) && (c = fp->ch[i]); i++)
638			{
639				switch (c)
640				{
641				case C_XXX:
642					for (c = 0; c <= MAX; c++)
643						rp[c] = n;
644					/*FALLTHROUGH*/
645
646				case C_EOF:
647					fsm[TERMINAL][fp->state+1] = n < 0 ? ~n : n;
648					continue;
649
650				case C_LET:
651					s = let;
652					break;
653
654				case C_HEX:
655					s = hex;
656					break;
657
658				case C_DEC:
659					s = dec;
660					break;
661
662				case C_OCT:
663					s = oct;
664					break;
665
666				default:
667					rp[c] = n;
668					continue;
669				}
670				while (c = *s++)
671					rp[c] = n;
672			}
673		}
674
675		/*
676		 * install splice special cases
677		 * and same non-terminal transitions
678		 */
679
680		for (i = 0; i < TERMINAL; i++)
681		{
682			rp = fsm[i];
683			s = spl;
684			while (c = *s++)
685				if (c != MARK || !INCOMMENT(rp))
686				{
687					if (rp[c] >= 0) rp[c] = ~rp[c];
688					rp[c] &= ~SPLICE;
689				}
690			rp[EOB] = ~S_EOB;
691			for (c = 0; c <= MAX; c++)
692				if (rp[c] == i)
693					rp[c] = 0;
694		}
695		fsm[TERMINAL][0] = ~S_EOB;
696
697#if !PROTOMAIN
698
699		/*
700		 * default character types
701		 */
702
703		s = let;
704		while (c = *s++)
705			setid(c);
706		s = dec;
707		while (c = *s++)
708			setdig(c);
709		s = spl;
710		do setsplice(c = *s++); while (c);
711
712		/*
713		 * trigraph map
714		 */
715
716		trigraph['='] = '#';
717		trigraph['('] = '[';
718		trigraph['/'] = '\\';
719		trigraph[')'] = ']';
720		trigraph['\''] = '^';
721		trigraph['<'] = '{';
722		trigraph['!'] = '|';
723		trigraph['>'] = '}';
724		trigraph['-'] = '~';
725#endif
726		break;
727
728#if !PROTOMAIN
729
730	case FSM_PLUSPLUS:
731		if (pp.option & PLUSPLUS)
732		{
733			fsm[COLON1][':'] = ~KEEP(T_SCOPE);
734			fsm[DOT1]['*'] = ~KEEP(T_DOTREF);
735			fsm[MINUS1]['>'] = ARROW1;
736			fsm[COM1]['/'] = COM5;
737			t = "%<:";
738			for (i = 0; i < TERMINAL; i++)
739			{
740				rp = fsm[i];
741				if (!INCOMMENT(rp) && !INQUOTE(rp))
742				{
743					s = t;
744					while (c = *s++)
745					{
746						if (rp[c] > 0) rp[c] = ~rp[c];
747						else if (!rp[c]) rp[c] = ~i;
748						rp[c] &= ~SPLICE;
749					}
750				}
751			}
752			s = t;
753			while (c = *s++) setsplice(c);
754		}
755		else
756		{
757			fsm[COLON1][':'] = ~S_CHRB;
758			fsm[DOT1]['*'] = ~S_CHRB;
759			fsm[MINUS1]['>'] = ~KEEP(T_PTRMEM);
760			fsm[COM1]['/'] = (pp.option & PLUSCOMMENT) ? COM5 : ~S_CHRB;
761		}
762		break;
763
764#if COMPATIBLE
765
766	case FSM_COMPATIBILITY:
767		if (pp.state & COMPATIBILITY)
768		{
769			fsm[HEX1]['e'] = HEX1;
770			fsm[HEX1]['E'] = HEX1;
771			fsm[QNUM]['e'] = QNUM;
772			fsm[QNUM]['E'] = QNUM;
773			fsm[QNUM]['u'] = ~QUAL(QNUM);
774			fsm[QNUM]['U'] = ~QUAL(QNUM);
775		}
776		else
777		{
778			fsm[HEX1]['e'] = HEX3;
779			fsm[HEX1]['E'] = HEX3;
780			fsm[QNUM]['e'] = QEXP;
781			fsm[QNUM]['E'] = QEXP;
782			fsm[QNUM]['u'] = QNUM;
783			fsm[QNUM]['U'] = QNUM;
784		}
785		break;
786
787#endif
788
789	case FSM_QUOTADD:
790		while (c = *s++)
791			if (fsm[TOKEN][c] == ~S_HUH)
792				for (i = 0; i < TERMINAL; i++)
793					fsm[i][c] = fsm[i]['"'];
794			else error(2, "%c: cannot add to quote set", c);
795		break;
796
797	case FSM_QUOTDEL:
798		while (c = *s++)
799			if (c != '"' && fsm[TOKEN][c] == fsm[TOKEN]['"'])
800				for (i = 0; i < TERMINAL; i++)
801					fsm[i][c] = fsm[i]['_'];
802		break;
803
804	case FSM_OPSPACE:
805		n = s ? BIN1 : ~S_CHRB;
806		fsm[COM1][' '] = fsm[COM1]['\t'] = n;
807		fsm[AND1][' '] = fsm[AND1]['\t'] = n;
808		fsm[STAR1][' '] = fsm[STAR1]['\t'] = n;
809		fsm[PCT1][' '] = fsm[PCT1]['\t'] = n;
810		fsm[PLUS1][' '] = fsm[PLUS1]['\t'] = n;
811		fsm[MINUS1][' '] = fsm[MINUS1]['\t'] = n;
812		fsm[CIRC1][' '] = fsm[CIRC1]['\t'] = n;
813		fsm[OR1][' '] = fsm[OR1]['\t'] = n;
814		fsm[LSH1][' '] = fsm[LSH1]['\t'] = s ? BIN1 : ~BACK(T_LSHIFT);
815		fsm[RSH1][' '] = fsm[RSH1]['\t'] = s ? BIN1 : ~BACK(T_RSHIFT);
816		break;
817
818	case FSM_MACRO:
819		if (pp.truncate && strlen(s) >= pp.truncate)
820		{
821			x = s[pp.truncate];
822			s[pp.truncate] = 0;
823		}
824		else x = -1;
825		i = MAC0 + ((c = *s++) != 'L');
826		if ((n = fsm[QUICK][c]) != (i + NMAC))
827		{
828			n = i;
829			if (!*s) n += NMAC;
830		}
831		if (fsm[QUICK][c] != n)
832			fsm[QUICK][c] = fsm[QCOM][c] = fsm[QTOK][c] = n;
833		if (c = *s++)
834		{
835			for (;;)
836			{
837				if ((i = n) < HIT0)
838				{
839					if (n < MACN) n++;
840					if (!*s)
841					{
842						n += NMAC;
843						break;
844					}
845					if (fsm[i][c] < HIT0)
846						fsm[i][c] = n;
847					if (fsm[i + NMAC][c] < HIT0)
848						fsm[i + NMAC][c] = n;
849				}
850				else
851				{
852					if (n < HITN) n++;
853					if (!*s) break;
854					if (fsm[i][c] < HIT0)
855					{
856						n -= NMAC;
857						fsm[i][c] = n;
858					}
859				}
860				c = *s++;
861			}
862			if (x >= 0)
863			{
864				*s = x;
865				for (n = CHAR_MIN; n <= CHAR_MAX; n++)
866					if (ppisidig(n))
867						fsm[HITN][n] = HITN;
868				n = HITN;
869			}
870			if (fsm[i][c] < n)
871				fsm[i][c] = n;
872			if (i < HIT0 && fsm[i + NMAC][c] < n)
873				fsm[i + NMAC][c] = n;
874		}
875		break;
876
877#endif
878
879	}
880}
881
882#if !PROTOMAIN
883
884/*
885 * file buffer refill
886 * c is current input char
887 */
888
889void
890refill(register int c)
891{
892	if (pp.in->flags & IN_eof)
893	{
894		pp.in->nextchr--;
895		c = 0;
896	}
897	else
898	{
899		*((pp.in->nextchr = pp.in->buffer + PPBAKSIZ) - 1) = c;
900		c =
901#if PROTOTYPE
902		(pp.in->flags & IN_prototype) ? pppread(pp.in->nextchr) :
903#endif
904		read(pp.in->fd, pp.in->nextchr, PPBUFSIZ);
905	}
906	if (c > 0)
907	{
908		if (pp.in->nextchr[c - 1] == '\n') pp.in->flags |= IN_newline;
909		else pp.in->flags &= ~IN_newline;
910#if PROTOTYPE
911		if (!(pp.in->flags & IN_prototype))
912#endif
913		if (c < PPBUFSIZ && (pp.in->flags & IN_regular))
914		{
915			pp.in->flags |= IN_eof;
916			close(pp.in->fd);
917			pp.in->fd = -1;
918		}
919	}
920	else
921	{
922		if (c < 0)
923		{
924			error(ERROR_SYSTEM|3, "read error");
925			c = 0;
926		}
927		else if ((pp.in->flags ^ pp.in->prev->flags) & IN_c)
928		{
929			static char	ket[] = { 0, '}', '\n', 0 };
930
931			pp.in->flags ^= IN_c;
932			pp.in->nextchr = ket + 1;
933			c = 2;
934		}
935		pp.in->flags |= IN_eof;
936	}
937#if CHECKPOINT
938	pp.in->buflen = c;
939#endif
940	pp.in->nextchr[c] = 0;
941	debug((-7, "refill(\"%s\") = %d = \"%-.*s%s\"", error_info.file, c, (c > 32 ? 32 : c), pp.in->nextchr, c > 32 ? "..." : ""));
942	if (pp.test & 0x0080)
943		sfprintf(sfstderr, "===== refill(\"%s\") = %d =====\n%s\n===== eob(\"%s\") =====\n", error_info.file, c, pp.in->nextchr, error_info.file);
944}
945
946#endif
947