Deleted Added
full compact
lexi.c (116390) lexi.c (125618)
1/*
2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980, 1993
4 * The Regents of the University of California. All rights reserved.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#if 0
37#ifndef lint
38static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93";
39#endif /* not lint */
40#endif
41#include <sys/cdefs.h>
1/*
2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980, 1993
4 * The Regents of the University of California. All rights reserved.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#if 0
37#ifndef lint
38static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93";
39#endif /* not lint */
40#endif
41#include <sys/cdefs.h>
42__FBSDID("$FreeBSD: head/usr.bin/indent/lexi.c 116390 2003-06-15 09:28:17Z charnier $");
42__FBSDID("$FreeBSD: head/usr.bin/indent/lexi.c 125618 2004-02-09 12:52:15Z bde $");
43
44/*
45 * Here we have the token scanner for indent. It scans off one token and puts
46 * it in the global variable "token". It returns a code, indicating the type
47 * of token scanned.
48 */
49
50#include <err.h>
51#include <stdio.h>
52#include <ctype.h>
53#include <stdlib.h>
54#include <string.h>
55#include "indent_globs.h"
56#include "indent_codes.h"
57#include "indent.h"
58
59#define alphanum 1
60#define opchar 3
61
62struct templ {
63 const char *rwd;
64 int rwcode;
65};
66
43
44/*
45 * Here we have the token scanner for indent. It scans off one token and puts
46 * it in the global variable "token". It returns a code, indicating the type
47 * of token scanned.
48 */
49
50#include <err.h>
51#include <stdio.h>
52#include <ctype.h>
53#include <stdlib.h>
54#include <string.h>
55#include "indent_globs.h"
56#include "indent_codes.h"
57#include "indent.h"
58
59#define alphanum 1
60#define opchar 3
61
62struct templ {
63 const char *rwd;
64 int rwcode;
65};
66
67struct templ specials[1000] =
67struct templ specials[100] =
68{
69 {"switch", 1},
70 {"case", 2},
71 {"break", 0},
72 {"struct", 3},
73 {"union", 3},
74 {"enum", 3},
75 {"default", 2},
76 {"int", 4},
77 {"char", 4},
78 {"float", 4},
79 {"double", 4},
80 {"long", 4},
81 {"short", 4},
82 {"typdef", 4},
83 {"unsigned", 4},
84 {"register", 4},
85 {"static", 4},
86 {"global", 4},
87 {"extern", 4},
88 {"void", 4},
89 {"goto", 0},
90 {"return", 0},
91 {"if", 5},
92 {"while", 5},
93 {"for", 5},
94 {"else", 6},
95 {"do", 6},
96 {"sizeof", 7},
68{
69 {"switch", 1},
70 {"case", 2},
71 {"break", 0},
72 {"struct", 3},
73 {"union", 3},
74 {"enum", 3},
75 {"default", 2},
76 {"int", 4},
77 {"char", 4},
78 {"float", 4},
79 {"double", 4},
80 {"long", 4},
81 {"short", 4},
82 {"typdef", 4},
83 {"unsigned", 4},
84 {"register", 4},
85 {"static", 4},
86 {"global", 4},
87 {"extern", 4},
88 {"void", 4},
89 {"goto", 0},
90 {"return", 0},
91 {"if", 5},
92 {"while", 5},
93 {"for", 5},
94 {"else", 6},
95 {"do", 6},
96 {"sizeof", 7},
97 {"const", 9},
98 {"volatile", 9},
99 {0, 0}
100};
101
102char chartype[128] =
103{ /* this is used to facilitate the decision of
104 * what type (alphanumeric, operator) each
105 * character is */
106 0, 0, 0, 0, 0, 0, 0, 0,
107 0, 0, 0, 0, 0, 0, 0, 0,
108 0, 0, 0, 0, 0, 0, 0, 0,
109 0, 0, 0, 0, 0, 0, 0, 0,
110 0, 3, 0, 0, 1, 3, 3, 0,
111 0, 0, 3, 3, 0, 3, 0, 3,
112 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 0, 0, 3, 3, 3, 3,
114 0, 1, 1, 1, 1, 1, 1, 1,
115 1, 1, 1, 1, 1, 1, 1, 1,
116 1, 1, 1, 1, 1, 1, 1, 1,
117 1, 1, 1, 0, 0, 0, 3, 1,
118 0, 1, 1, 1, 1, 1, 1, 1,
119 1, 1, 1, 1, 1, 1, 1, 1,
120 1, 1, 1, 1, 1, 1, 1, 1,
121 1, 1, 1, 0, 3, 0, 3, 0
122};
123
124int
125lexi(void)
126{
127 int unary_delim; /* this is set to 1 if the current token
128 * forces a following operator to be unary */
129 static int last_code; /* the last token type returned */
130 static int l_struct; /* set to 1 if the last token was 'struct' */
131 int code; /* internal code to be returned */
132 char qchar; /* the delimiter character for a string */
133
134 e_token = s_token; /* point to start of place to save token */
135 unary_delim = false;
136 ps.col_1 = ps.last_nl; /* tell world that this token started in
137 * column 1 iff the last thing scanned was nl */
138 ps.last_nl = false;
139
140 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
141 ps.col_1 = false; /* leading blanks imply token is not in column
142 * 1 */
143 if (++buf_ptr >= buf_end)
144 fill_buffer();
145 }
146
147 /* Scan an alphanumeric token */
148 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
149 /*
150 * we have a character or number
151 */
152 const char *j; /* used for searching thru list of
153 *
154 * reserved words */
155 struct templ *p;
156
157 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
158 int seendot = 0,
159 seenexp = 0,
160 seensfx = 0;
161 if (*buf_ptr == '0' &&
162 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
163 *e_token++ = *buf_ptr++;
164 *e_token++ = *buf_ptr++;
165 while (isxdigit(*buf_ptr)) {
166 CHECK_SIZE_TOKEN;
167 *e_token++ = *buf_ptr++;
168 }
169 }
170 else
171 while (1) {
172 if (*buf_ptr == '.') {
173 if (seendot)
174 break;
175 else
176 seendot++;
177 }
178 CHECK_SIZE_TOKEN;
179 *e_token++ = *buf_ptr++;
180 if (!isdigit(*buf_ptr) && *buf_ptr != '.') {
181 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
182 break;
183 else {
184 seenexp++;
185 seendot++;
186 CHECK_SIZE_TOKEN;
187 *e_token++ = *buf_ptr++;
188 if (*buf_ptr == '+' || *buf_ptr == '-')
189 *e_token++ = *buf_ptr++;
190 }
191 }
192 }
193 while (1) {
194 if (!(seensfx & 1) &&
195 (*buf_ptr == 'U' || *buf_ptr == 'u')) {
196 CHECK_SIZE_TOKEN;
197 *e_token++ = *buf_ptr++;
198 seensfx |= 1;
199 continue;
200 }
201 if (!(seensfx & 2) &&
202 (*buf_ptr == 'L' || *buf_ptr == 'l')) {
203 CHECK_SIZE_TOKEN;
204 if (buf_ptr[1] == buf_ptr[0])
205 *e_token++ = *buf_ptr++;
206 *e_token++ = *buf_ptr++;
207 seensfx |= 2;
208 continue;
209 }
210 break;
211 }
212 }
213 else
214 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
215 /* fill_buffer() terminates buffer with newline */
216 if (*buf_ptr == BACKSLASH) {
217 if (*(buf_ptr + 1) == '\n') {
218 buf_ptr += 2;
219 if (buf_ptr >= buf_end)
220 fill_buffer();
221 } else
222 break;
223 }
224 CHECK_SIZE_TOKEN;
225 /* copy it over */
226 *e_token++ = *buf_ptr++;
227 if (buf_ptr >= buf_end)
228 fill_buffer();
229 }
230 *e_token++ = '\0';
231 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
232 if (++buf_ptr >= buf_end)
233 fill_buffer();
234 }
235 ps.its_a_keyword = false;
236 ps.sizeof_keyword = false;
237 if (l_struct) { /* if last token was 'struct', then this token
238 * should be treated as a declaration */
239 l_struct = false;
240 last_code = ident;
241 ps.last_u_d = true;
242 return (decl);
243 }
244 ps.last_u_d = false; /* Operator after identifier is binary */
245 last_code = ident; /* Remember that this is the code we will
246 * return */
247
248 /*
249 * This loop will check if the token is a keyword.
250 */
251 for (p = specials; (j = p->rwd) != 0; p++) {
252 const char *q = s_token; /* point at scanned token */
253 if (*j++ != *q++ || *j++ != *q++)
254 continue; /* This test depends on the fact that
255 * identifiers are always at least 1 character
256 * long (ie. the first two bytes of the
257 * identifier are always meaningful) */
258 if (q[-1] == 0)
259 break; /* If its a one-character identifier */
260 while (*q++ == *j)
261 if (*j++ == 0)
262 goto found_keyword; /* I wish that C had a multi-level
263 * break... */
264 }
265 if (p->rwd) { /* we have a keyword */
266 found_keyword:
267 ps.its_a_keyword = true;
268 ps.last_u_d = true;
269 switch (p->rwcode) {
270 case 1: /* it is a switch */
271 return (swstmt);
272 case 2: /* a case or default */
273 return (casestmt);
274
275 case 3: /* a "struct" */
276 if (ps.p_l_follow)
97 {0, 0}
98};
99
100char chartype[128] =
101{ /* this is used to facilitate the decision of
102 * what type (alphanumeric, operator) each
103 * character is */
104 0, 0, 0, 0, 0, 0, 0, 0,
105 0, 0, 0, 0, 0, 0, 0, 0,
106 0, 0, 0, 0, 0, 0, 0, 0,
107 0, 0, 0, 0, 0, 0, 0, 0,
108 0, 3, 0, 0, 1, 3, 3, 0,
109 0, 0, 3, 3, 0, 3, 0, 3,
110 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 0, 0, 3, 3, 3, 3,
112 0, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1,
114 1, 1, 1, 1, 1, 1, 1, 1,
115 1, 1, 1, 0, 0, 0, 3, 1,
116 0, 1, 1, 1, 1, 1, 1, 1,
117 1, 1, 1, 1, 1, 1, 1, 1,
118 1, 1, 1, 1, 1, 1, 1, 1,
119 1, 1, 1, 0, 3, 0, 3, 0
120};
121
122int
123lexi(void)
124{
125 int unary_delim; /* this is set to 1 if the current token
126 * forces a following operator to be unary */
127 static int last_code; /* the last token type returned */
128 static int l_struct; /* set to 1 if the last token was 'struct' */
129 int code; /* internal code to be returned */
130 char qchar; /* the delimiter character for a string */
131
132 e_token = s_token; /* point to start of place to save token */
133 unary_delim = false;
134 ps.col_1 = ps.last_nl; /* tell world that this token started in
135 * column 1 iff the last thing scanned was nl */
136 ps.last_nl = false;
137
138 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
139 ps.col_1 = false; /* leading blanks imply token is not in column
140 * 1 */
141 if (++buf_ptr >= buf_end)
142 fill_buffer();
143 }
144
145 /* Scan an alphanumeric token */
146 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
147 /*
148 * we have a character or number
149 */
150 const char *j; /* used for searching thru list of
151 *
152 * reserved words */
153 struct templ *p;
154
155 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
156 int seendot = 0,
157 seenexp = 0,
158 seensfx = 0;
159 if (*buf_ptr == '0' &&
160 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
161 *e_token++ = *buf_ptr++;
162 *e_token++ = *buf_ptr++;
163 while (isxdigit(*buf_ptr)) {
164 CHECK_SIZE_TOKEN;
165 *e_token++ = *buf_ptr++;
166 }
167 }
168 else
169 while (1) {
170 if (*buf_ptr == '.') {
171 if (seendot)
172 break;
173 else
174 seendot++;
175 }
176 CHECK_SIZE_TOKEN;
177 *e_token++ = *buf_ptr++;
178 if (!isdigit(*buf_ptr) && *buf_ptr != '.') {
179 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
180 break;
181 else {
182 seenexp++;
183 seendot++;
184 CHECK_SIZE_TOKEN;
185 *e_token++ = *buf_ptr++;
186 if (*buf_ptr == '+' || *buf_ptr == '-')
187 *e_token++ = *buf_ptr++;
188 }
189 }
190 }
191 while (1) {
192 if (!(seensfx & 1) &&
193 (*buf_ptr == 'U' || *buf_ptr == 'u')) {
194 CHECK_SIZE_TOKEN;
195 *e_token++ = *buf_ptr++;
196 seensfx |= 1;
197 continue;
198 }
199 if (!(seensfx & 2) &&
200 (*buf_ptr == 'L' || *buf_ptr == 'l')) {
201 CHECK_SIZE_TOKEN;
202 if (buf_ptr[1] == buf_ptr[0])
203 *e_token++ = *buf_ptr++;
204 *e_token++ = *buf_ptr++;
205 seensfx |= 2;
206 continue;
207 }
208 break;
209 }
210 }
211 else
212 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
213 /* fill_buffer() terminates buffer with newline */
214 if (*buf_ptr == BACKSLASH) {
215 if (*(buf_ptr + 1) == '\n') {
216 buf_ptr += 2;
217 if (buf_ptr >= buf_end)
218 fill_buffer();
219 } else
220 break;
221 }
222 CHECK_SIZE_TOKEN;
223 /* copy it over */
224 *e_token++ = *buf_ptr++;
225 if (buf_ptr >= buf_end)
226 fill_buffer();
227 }
228 *e_token++ = '\0';
229 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
230 if (++buf_ptr >= buf_end)
231 fill_buffer();
232 }
233 ps.its_a_keyword = false;
234 ps.sizeof_keyword = false;
235 if (l_struct) { /* if last token was 'struct', then this token
236 * should be treated as a declaration */
237 l_struct = false;
238 last_code = ident;
239 ps.last_u_d = true;
240 return (decl);
241 }
242 ps.last_u_d = false; /* Operator after identifier is binary */
243 last_code = ident; /* Remember that this is the code we will
244 * return */
245
246 /*
247 * This loop will check if the token is a keyword.
248 */
249 for (p = specials; (j = p->rwd) != 0; p++) {
250 const char *q = s_token; /* point at scanned token */
251 if (*j++ != *q++ || *j++ != *q++)
252 continue; /* This test depends on the fact that
253 * identifiers are always at least 1 character
254 * long (ie. the first two bytes of the
255 * identifier are always meaningful) */
256 if (q[-1] == 0)
257 break; /* If its a one-character identifier */
258 while (*q++ == *j)
259 if (*j++ == 0)
260 goto found_keyword; /* I wish that C had a multi-level
261 * break... */
262 }
263 if (p->rwd) { /* we have a keyword */
264 found_keyword:
265 ps.its_a_keyword = true;
266 ps.last_u_d = true;
267 switch (p->rwcode) {
268 case 1: /* it is a switch */
269 return (swstmt);
270 case 2: /* a case or default */
271 return (casestmt);
272
273 case 3: /* a "struct" */
274 if (ps.p_l_follow)
277 break; /* inside parens: cast */
278 /*
279 * Next time around, we may want to know that we have had a
280 * 'struct'
281 */
275 break; /* inside parens: cast */
282 l_struct = true;
283
284 /*
276 l_struct = true;
277
278 /*
285 * Fall through to test for a cast, function prototype or
286 * sizeof().
279 * Next time around, we will want to know that we have had a
280 * 'struct'
287 */
288 case 4: /* one of the declaration keywords */
289 if (ps.p_l_follow) {
290 ps.cast_mask |= 1 << ps.p_l_follow;
281 */
282 case 4: /* one of the declaration keywords */
283 if (ps.p_l_follow) {
284 ps.cast_mask |= 1 << ps.p_l_follow;
291
292 /*
293 * Forget that we saw `struct' if we're in a sizeof().
294 */
295 if (ps.sizeof_mask)
296 l_struct = false;
297
298 break; /* inside parens: cast, prototype or sizeof() */
285 break; /* inside parens: cast */
299 }
300 last_code = decl;
301 return (decl);
302
303 case 5: /* if, while, for */
304 return (sp_paren);
305
306 case 6: /* do, else */
307 return (sp_nparen);
308
309 case 7:
310 ps.sizeof_keyword = true;
311 default: /* all others are treated like any other
312 * identifier */
313 return (ident);
314 } /* end of switch */
315 } /* end of if (found_it) */
316 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
317 char *tp = buf_ptr;
318 while (tp < buf_end)
319 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
320 goto not_proc;
321 strncpy(ps.procname, token, sizeof ps.procname - 1);
322 ps.in_parameter_declaration = 1;
323 rparen_count = 1;
324 not_proc:;
325 }
326 /*
327 * The following hack attempts to guess whether or not the current
328 * token is in fact a declaration keyword -- one that has been
329 * typedefd
330 */
331 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
332 && !ps.p_l_follow
333 && !ps.block_init
334 && (ps.last_token == rparen || ps.last_token == semicolon ||
335 ps.last_token == decl ||
336 ps.last_token == lbrace || ps.last_token == rbrace)) {
337 ps.its_a_keyword = true;
338 ps.last_u_d = true;
339 last_code = decl;
340 return decl;
341 }
342 if (last_code == decl) /* if this is a declared variable, then
343 * following sign is unary */
344 ps.last_u_d = true; /* will make "int a -1" work */
345 last_code = ident;
346 return (ident); /* the ident is not in the list */
347 } /* end of procesing for alpanum character */
348
349 /* Scan a non-alphanumeric token */
350
351 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
352 * moved here */
353 *e_token = '\0';
354 if (++buf_ptr >= buf_end)
355 fill_buffer();
356
357 switch (*token) {
358 case '\n':
359 unary_delim = ps.last_u_d;
360 ps.last_nl = true; /* remember that we just had a newline */
361 code = (had_eof ? 0 : newline);
362
363 /*
364 * if data has been exhausted, the newline is a dummy, and we should
365 * return code to stop
366 */
367 break;
368
369 case '\'': /* start of quoted character */
370 case '"': /* start of string */
371 qchar = *token;
372 if (troff) {
373 e_token[-1] = '`';
374 if (qchar == '"')
375 *e_token++ = '`';
376 e_token = chfont(&bodyf, &stringf, e_token);
377 }
378 do { /* copy the string */
379 while (1) { /* move one character or [/<char>]<char> */
380 if (*buf_ptr == '\n') {
381 printf("%d: Unterminated literal\n", line_no);
382 goto stop_lit;
383 }
384 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
385 * since CHECK_SIZE guarantees that there
386 * are at least 5 entries left */
387 *e_token = *buf_ptr++;
388 if (buf_ptr >= buf_end)
389 fill_buffer();
390 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
391 if (*buf_ptr == '\n') /* check for escaped newline */
392 ++line_no;
393 if (troff) {
394 *++e_token = BACKSLASH;
395 if (*buf_ptr == BACKSLASH)
396 *++e_token = BACKSLASH;
397 }
398 *++e_token = *buf_ptr++;
399 ++e_token; /* we must increment this again because we
400 * copied two chars */
401 if (buf_ptr >= buf_end)
402 fill_buffer();
403 }
404 else
405 break; /* we copied one character */
406 } /* end of while (1) */
407 } while (*e_token++ != qchar);
408 if (troff) {
409 e_token = chfont(&stringf, &bodyf, e_token - 1);
410 if (qchar == '"')
411 *e_token++ = '\'';
412 }
413stop_lit:
414 code = ident;
415 break;
416
417 case ('('):
418 case ('['):
419 unary_delim = true;
420 code = lparen;
421 break;
422
423 case (')'):
424 case (']'):
425 code = rparen;
426 break;
427
428 case '#':
429 unary_delim = ps.last_u_d;
430 code = preesc;
431 break;
432
433 case '?':
434 unary_delim = true;
435 code = question;
436 break;
437
438 case (':'):
439 code = colon;
440 unary_delim = true;
441 break;
442
443 case (';'):
444 unary_delim = true;
445 code = semicolon;
446 break;
447
448 case ('{'):
449 unary_delim = true;
450
451 /*
452 * if (ps.in_or_st) ps.block_init = 1;
453 */
454 /* ? code = ps.block_init ? lparen : lbrace; */
455 code = lbrace;
456 break;
457
458 case ('}'):
459 unary_delim = true;
460 /* ? code = ps.block_init ? rparen : rbrace; */
461 code = rbrace;
462 break;
463
464 case 014: /* a form feed */
465 unary_delim = ps.last_u_d;
466 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
467 * right */
468 code = form_feed;
469 break;
470
471 case (','):
472 unary_delim = true;
473 code = comma;
474 break;
475
476 case '.':
477 unary_delim = false;
478 code = period;
479 break;
480
481 case '-':
482 case '+': /* check for -, +, --, ++ */
483 code = (ps.last_u_d ? unary_op : binary_op);
484 unary_delim = true;
485
486 if (*buf_ptr == token[0]) {
487 /* check for doubled character */
488 *e_token++ = *buf_ptr++;
489 /* buffer overflow will be checked at end of loop */
490 if (last_code == ident || last_code == rparen) {
491 code = (ps.last_u_d ? unary_op : postop);
492 /* check for following ++ or -- */
493 unary_delim = false;
494 }
495 }
496 else if (*buf_ptr == '=')
497 /* check for operator += */
498 *e_token++ = *buf_ptr++;
499 else if (*buf_ptr == '>') {
500 /* check for operator -> */
501 *e_token++ = *buf_ptr++;
502 if (!pointer_as_binop) {
503 unary_delim = false;
504 code = unary_op;
505 ps.want_blank = false;
506 }
507 }
508 break; /* buffer overflow will be checked at end of
509 * switch */
510
511 case '=':
512 if (ps.in_or_st)
513 ps.block_init = 1;
514#ifdef undef
515 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
516 e_token[-1] = *buf_ptr++;
517 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
518 *e_token++ = *buf_ptr++;
519 *e_token++ = '='; /* Flip =+ to += */
520 *e_token = 0;
521 }
522#else
523 if (*buf_ptr == '=') {/* == */
524 *e_token++ = '='; /* Flip =+ to += */
525 buf_ptr++;
526 *e_token = 0;
527 }
528#endif
529 code = binary_op;
530 unary_delim = true;
531 break;
532 /* can drop thru!!! */
533
534 case '>':
535 case '<':
536 case '!': /* ops like <, <<, <=, !=, etc */
537 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
538 *e_token++ = *buf_ptr;
539 if (++buf_ptr >= buf_end)
540 fill_buffer();
541 }
542 if (*buf_ptr == '=')
543 *e_token++ = *buf_ptr++;
544 code = (ps.last_u_d ? unary_op : binary_op);
545 unary_delim = true;
546 break;
547
548 default:
549 if (token[0] == '/' && *buf_ptr == '*') {
550 /* it is start of comment */
551 *e_token++ = '*';
552
553 if (++buf_ptr >= buf_end)
554 fill_buffer();
555
556 code = comment;
557 unary_delim = ps.last_u_d;
558 break;
559 }
560 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
561 /*
562 * handle ||, &&, etc, and also things as in int *****i
563 */
564 *e_token++ = *buf_ptr;
565 if (++buf_ptr >= buf_end)
566 fill_buffer();
567 }
568 code = (ps.last_u_d ? unary_op : binary_op);
569 unary_delim = true;
570
571
572 } /* end of switch */
573 if (code != newline) {
574 l_struct = false;
575 last_code = code;
576 }
577 if (buf_ptr >= buf_end) /* check for input buffer empty */
578 fill_buffer();
579 ps.last_u_d = unary_delim;
580 *e_token = '\0'; /* null terminate the token */
581 return (code);
582}
583
584/*
585 * Add the given keyword to the keyword table, using val as the keyword type
586 */
587void
588addkey(char *key, int val)
589{
590 struct templ *p = specials;
591 while (p->rwd)
592 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
593 return;
594 else
595 p++;
596 if (p >= specials + sizeof specials / sizeof specials[0])
597 return; /* For now, table overflows are silently
598 * ignored */
599 p->rwd = key;
600 p->rwcode = val;
601 p[1].rwd = 0;
602 p[1].rwcode = 0;
603}
286 }
287 last_code = decl;
288 return (decl);
289
290 case 5: /* if, while, for */
291 return (sp_paren);
292
293 case 6: /* do, else */
294 return (sp_nparen);
295
296 case 7:
297 ps.sizeof_keyword = true;
298 default: /* all others are treated like any other
299 * identifier */
300 return (ident);
301 } /* end of switch */
302 } /* end of if (found_it) */
303 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
304 char *tp = buf_ptr;
305 while (tp < buf_end)
306 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
307 goto not_proc;
308 strncpy(ps.procname, token, sizeof ps.procname - 1);
309 ps.in_parameter_declaration = 1;
310 rparen_count = 1;
311 not_proc:;
312 }
313 /*
314 * The following hack attempts to guess whether or not the current
315 * token is in fact a declaration keyword -- one that has been
316 * typedefd
317 */
318 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
319 && !ps.p_l_follow
320 && !ps.block_init
321 && (ps.last_token == rparen || ps.last_token == semicolon ||
322 ps.last_token == decl ||
323 ps.last_token == lbrace || ps.last_token == rbrace)) {
324 ps.its_a_keyword = true;
325 ps.last_u_d = true;
326 last_code = decl;
327 return decl;
328 }
329 if (last_code == decl) /* if this is a declared variable, then
330 * following sign is unary */
331 ps.last_u_d = true; /* will make "int a -1" work */
332 last_code = ident;
333 return (ident); /* the ident is not in the list */
334 } /* end of procesing for alpanum character */
335
336 /* Scan a non-alphanumeric token */
337
338 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
339 * moved here */
340 *e_token = '\0';
341 if (++buf_ptr >= buf_end)
342 fill_buffer();
343
344 switch (*token) {
345 case '\n':
346 unary_delim = ps.last_u_d;
347 ps.last_nl = true; /* remember that we just had a newline */
348 code = (had_eof ? 0 : newline);
349
350 /*
351 * if data has been exhausted, the newline is a dummy, and we should
352 * return code to stop
353 */
354 break;
355
356 case '\'': /* start of quoted character */
357 case '"': /* start of string */
358 qchar = *token;
359 if (troff) {
360 e_token[-1] = '`';
361 if (qchar == '"')
362 *e_token++ = '`';
363 e_token = chfont(&bodyf, &stringf, e_token);
364 }
365 do { /* copy the string */
366 while (1) { /* move one character or [/<char>]<char> */
367 if (*buf_ptr == '\n') {
368 printf("%d: Unterminated literal\n", line_no);
369 goto stop_lit;
370 }
371 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
372 * since CHECK_SIZE guarantees that there
373 * are at least 5 entries left */
374 *e_token = *buf_ptr++;
375 if (buf_ptr >= buf_end)
376 fill_buffer();
377 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
378 if (*buf_ptr == '\n') /* check for escaped newline */
379 ++line_no;
380 if (troff) {
381 *++e_token = BACKSLASH;
382 if (*buf_ptr == BACKSLASH)
383 *++e_token = BACKSLASH;
384 }
385 *++e_token = *buf_ptr++;
386 ++e_token; /* we must increment this again because we
387 * copied two chars */
388 if (buf_ptr >= buf_end)
389 fill_buffer();
390 }
391 else
392 break; /* we copied one character */
393 } /* end of while (1) */
394 } while (*e_token++ != qchar);
395 if (troff) {
396 e_token = chfont(&stringf, &bodyf, e_token - 1);
397 if (qchar == '"')
398 *e_token++ = '\'';
399 }
400stop_lit:
401 code = ident;
402 break;
403
404 case ('('):
405 case ('['):
406 unary_delim = true;
407 code = lparen;
408 break;
409
410 case (')'):
411 case (']'):
412 code = rparen;
413 break;
414
415 case '#':
416 unary_delim = ps.last_u_d;
417 code = preesc;
418 break;
419
420 case '?':
421 unary_delim = true;
422 code = question;
423 break;
424
425 case (':'):
426 code = colon;
427 unary_delim = true;
428 break;
429
430 case (';'):
431 unary_delim = true;
432 code = semicolon;
433 break;
434
435 case ('{'):
436 unary_delim = true;
437
438 /*
439 * if (ps.in_or_st) ps.block_init = 1;
440 */
441 /* ? code = ps.block_init ? lparen : lbrace; */
442 code = lbrace;
443 break;
444
445 case ('}'):
446 unary_delim = true;
447 /* ? code = ps.block_init ? rparen : rbrace; */
448 code = rbrace;
449 break;
450
451 case 014: /* a form feed */
452 unary_delim = ps.last_u_d;
453 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
454 * right */
455 code = form_feed;
456 break;
457
458 case (','):
459 unary_delim = true;
460 code = comma;
461 break;
462
463 case '.':
464 unary_delim = false;
465 code = period;
466 break;
467
468 case '-':
469 case '+': /* check for -, +, --, ++ */
470 code = (ps.last_u_d ? unary_op : binary_op);
471 unary_delim = true;
472
473 if (*buf_ptr == token[0]) {
474 /* check for doubled character */
475 *e_token++ = *buf_ptr++;
476 /* buffer overflow will be checked at end of loop */
477 if (last_code == ident || last_code == rparen) {
478 code = (ps.last_u_d ? unary_op : postop);
479 /* check for following ++ or -- */
480 unary_delim = false;
481 }
482 }
483 else if (*buf_ptr == '=')
484 /* check for operator += */
485 *e_token++ = *buf_ptr++;
486 else if (*buf_ptr == '>') {
487 /* check for operator -> */
488 *e_token++ = *buf_ptr++;
489 if (!pointer_as_binop) {
490 unary_delim = false;
491 code = unary_op;
492 ps.want_blank = false;
493 }
494 }
495 break; /* buffer overflow will be checked at end of
496 * switch */
497
498 case '=':
499 if (ps.in_or_st)
500 ps.block_init = 1;
501#ifdef undef
502 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
503 e_token[-1] = *buf_ptr++;
504 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
505 *e_token++ = *buf_ptr++;
506 *e_token++ = '='; /* Flip =+ to += */
507 *e_token = 0;
508 }
509#else
510 if (*buf_ptr == '=') {/* == */
511 *e_token++ = '='; /* Flip =+ to += */
512 buf_ptr++;
513 *e_token = 0;
514 }
515#endif
516 code = binary_op;
517 unary_delim = true;
518 break;
519 /* can drop thru!!! */
520
521 case '>':
522 case '<':
523 case '!': /* ops like <, <<, <=, !=, etc */
524 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
525 *e_token++ = *buf_ptr;
526 if (++buf_ptr >= buf_end)
527 fill_buffer();
528 }
529 if (*buf_ptr == '=')
530 *e_token++ = *buf_ptr++;
531 code = (ps.last_u_d ? unary_op : binary_op);
532 unary_delim = true;
533 break;
534
535 default:
536 if (token[0] == '/' && *buf_ptr == '*') {
537 /* it is start of comment */
538 *e_token++ = '*';
539
540 if (++buf_ptr >= buf_end)
541 fill_buffer();
542
543 code = comment;
544 unary_delim = ps.last_u_d;
545 break;
546 }
547 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
548 /*
549 * handle ||, &&, etc, and also things as in int *****i
550 */
551 *e_token++ = *buf_ptr;
552 if (++buf_ptr >= buf_end)
553 fill_buffer();
554 }
555 code = (ps.last_u_d ? unary_op : binary_op);
556 unary_delim = true;
557
558
559 } /* end of switch */
560 if (code != newline) {
561 l_struct = false;
562 last_code = code;
563 }
564 if (buf_ptr >= buf_end) /* check for input buffer empty */
565 fill_buffer();
566 ps.last_u_d = unary_delim;
567 *e_token = '\0'; /* null terminate the token */
568 return (code);
569}
570
571/*
572 * Add the given keyword to the keyword table, using val as the keyword type
573 */
574void
575addkey(char *key, int val)
576{
577 struct templ *p = specials;
578 while (p->rwd)
579 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
580 return;
581 else
582 p++;
583 if (p >= specials + sizeof specials / sizeof specials[0])
584 return; /* For now, table overflows are silently
585 * ignored */
586 p->rwd = key;
587 p->rwcode = val;
588 p[1].rwd = 0;
589 p[1].rwcode = 0;
590}