Deleted Added
full compact
lexi.c (93440) lexi.c (98771)
1/*
2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980, 1993
4 * The Regents of the University of California. All rights reserved.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#if 0
37#ifndef lint
38static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93";
39static const char rcsid[] =
1/*
2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980, 1993
4 * The Regents of the University of California. All rights reserved.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#if 0
37#ifndef lint
38static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93";
39static const char rcsid[] =
40 "$FreeBSD: head/usr.bin/indent/lexi.c 93440 2002-03-30 17:10:20Z dwmalone $";
40 "$FreeBSD: head/usr.bin/indent/lexi.c 98771 2002-06-24 17:40:27Z jmallett $";
41#endif /* not lint */
42#endif
43
44/*
45 * Here we have the token scanner for indent. It scans off one token and puts
46 * it in the global variable "token". It returns a code, indicating the type
47 * of token scanned.
48 */
49
50#include <stdio.h>
51#include <ctype.h>
52#include <stdlib.h>
53#include <string.h>
54#include "indent_globs.h"
55#include "indent_codes.h"
56#include "indent.h"
57
58#define alphanum 1
59#define opchar 3
60
61void fill_buffer(void);
62
63struct templ {
64 const char *rwd;
65 int rwcode;
66};
67
68struct templ specials[1000] =
69{
70 {"switch", 1},
71 {"case", 2},
72 {"break", 0},
73 {"struct", 3},
74 {"union", 3},
75 {"enum", 3},
76 {"default", 2},
77 {"int", 4},
78 {"char", 4},
79 {"float", 4},
80 {"double", 4},
81 {"long", 4},
82 {"short", 4},
83 {"typdef", 4},
84 {"unsigned", 4},
85 {"register", 4},
86 {"static", 4},
87 {"global", 4},
88 {"extern", 4},
89 {"void", 4},
90 {"goto", 0},
91 {"return", 0},
92 {"if", 5},
93 {"while", 5},
94 {"for", 5},
95 {"else", 6},
96 {"do", 6},
97 {"sizeof", 7},
98 {"const", 9},
99 {"volatile", 9},
100 {0, 0}
101};
102
103char chartype[128] =
104{ /* this is used to facilitate the decision of
105 * what type (alphanumeric, operator) each
106 * character is */
107 0, 0, 0, 0, 0, 0, 0, 0,
108 0, 0, 0, 0, 0, 0, 0, 0,
109 0, 0, 0, 0, 0, 0, 0, 0,
110 0, 0, 0, 0, 0, 0, 0, 0,
111 0, 3, 0, 0, 1, 3, 3, 0,
112 0, 0, 3, 3, 0, 3, 0, 3,
113 1, 1, 1, 1, 1, 1, 1, 1,
114 1, 1, 0, 0, 3, 3, 3, 3,
115 0, 1, 1, 1, 1, 1, 1, 1,
116 1, 1, 1, 1, 1, 1, 1, 1,
117 1, 1, 1, 1, 1, 1, 1, 1,
118 1, 1, 1, 0, 0, 0, 3, 1,
119 0, 1, 1, 1, 1, 1, 1, 1,
120 1, 1, 1, 1, 1, 1, 1, 1,
121 1, 1, 1, 1, 1, 1, 1, 1,
122 1, 1, 1, 0, 3, 0, 3, 0
123};
124
125int
126lexi(void)
127{
128 int unary_delim; /* this is set to 1 if the current token
129 * forces a following operator to be unary */
130 static int last_code; /* the last token type returned */
131 static int l_struct; /* set to 1 if the last token was 'struct' */
132 int code; /* internal code to be returned */
133 char qchar; /* the delimiter character for a string */
134
135 e_token = s_token; /* point to start of place to save token */
136 unary_delim = false;
137 ps.col_1 = ps.last_nl; /* tell world that this token started in
138 * column 1 iff the last thing scanned was nl */
139 ps.last_nl = false;
140
141 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
142 ps.col_1 = false; /* leading blanks imply token is not in column
143 * 1 */
144 if (++buf_ptr >= buf_end)
145 fill_buffer();
146 }
147
148 /* Scan an alphanumeric token */
149 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
150 /*
151 * we have a character or number
152 */
153 const char *j; /* used for searching thru list of
154 *
155 * reserved words */
41#endif /* not lint */
42#endif
43
44/*
45 * Here we have the token scanner for indent. It scans off one token and puts
46 * it in the global variable "token". It returns a code, indicating the type
47 * of token scanned.
48 */
49
50#include <stdio.h>
51#include <ctype.h>
52#include <stdlib.h>
53#include <string.h>
54#include "indent_globs.h"
55#include "indent_codes.h"
56#include "indent.h"
57
58#define alphanum 1
59#define opchar 3
60
61void fill_buffer(void);
62
63struct templ {
64 const char *rwd;
65 int rwcode;
66};
67
68struct templ specials[1000] =
69{
70 {"switch", 1},
71 {"case", 2},
72 {"break", 0},
73 {"struct", 3},
74 {"union", 3},
75 {"enum", 3},
76 {"default", 2},
77 {"int", 4},
78 {"char", 4},
79 {"float", 4},
80 {"double", 4},
81 {"long", 4},
82 {"short", 4},
83 {"typdef", 4},
84 {"unsigned", 4},
85 {"register", 4},
86 {"static", 4},
87 {"global", 4},
88 {"extern", 4},
89 {"void", 4},
90 {"goto", 0},
91 {"return", 0},
92 {"if", 5},
93 {"while", 5},
94 {"for", 5},
95 {"else", 6},
96 {"do", 6},
97 {"sizeof", 7},
98 {"const", 9},
99 {"volatile", 9},
100 {0, 0}
101};
102
103char chartype[128] =
104{ /* this is used to facilitate the decision of
105 * what type (alphanumeric, operator) each
106 * character is */
107 0, 0, 0, 0, 0, 0, 0, 0,
108 0, 0, 0, 0, 0, 0, 0, 0,
109 0, 0, 0, 0, 0, 0, 0, 0,
110 0, 0, 0, 0, 0, 0, 0, 0,
111 0, 3, 0, 0, 1, 3, 3, 0,
112 0, 0, 3, 3, 0, 3, 0, 3,
113 1, 1, 1, 1, 1, 1, 1, 1,
114 1, 1, 0, 0, 3, 3, 3, 3,
115 0, 1, 1, 1, 1, 1, 1, 1,
116 1, 1, 1, 1, 1, 1, 1, 1,
117 1, 1, 1, 1, 1, 1, 1, 1,
118 1, 1, 1, 0, 0, 0, 3, 1,
119 0, 1, 1, 1, 1, 1, 1, 1,
120 1, 1, 1, 1, 1, 1, 1, 1,
121 1, 1, 1, 1, 1, 1, 1, 1,
122 1, 1, 1, 0, 3, 0, 3, 0
123};
124
125int
126lexi(void)
127{
128 int unary_delim; /* this is set to 1 if the current token
129 * forces a following operator to be unary */
130 static int last_code; /* the last token type returned */
131 static int l_struct; /* set to 1 if the last token was 'struct' */
132 int code; /* internal code to be returned */
133 char qchar; /* the delimiter character for a string */
134
135 e_token = s_token; /* point to start of place to save token */
136 unary_delim = false;
137 ps.col_1 = ps.last_nl; /* tell world that this token started in
138 * column 1 iff the last thing scanned was nl */
139 ps.last_nl = false;
140
141 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
142 ps.col_1 = false; /* leading blanks imply token is not in column
143 * 1 */
144 if (++buf_ptr >= buf_end)
145 fill_buffer();
146 }
147
148 /* Scan an alphanumeric token */
149 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
150 /*
151 * we have a character or number
152 */
153 const char *j; /* used for searching thru list of
154 *
155 * reserved words */
156 register struct templ *p;
156 struct templ *p;
157
158 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
159 int seendot = 0,
160 seenexp = 0,
161 seensfx = 0;
162 if (*buf_ptr == '0' &&
163 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
164 *e_token++ = *buf_ptr++;
165 *e_token++ = *buf_ptr++;
166 while (isxdigit(*buf_ptr)) {
167 CHECK_SIZE_TOKEN;
168 *e_token++ = *buf_ptr++;
169 }
170 }
171 else
172 while (1) {
173 if (*buf_ptr == '.') {
174 if (seendot)
175 break;
176 else
177 seendot++;
178 }
179 CHECK_SIZE_TOKEN;
180 *e_token++ = *buf_ptr++;
181 if (!isdigit(*buf_ptr) && *buf_ptr != '.') {
182 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
183 break;
184 else {
185 seenexp++;
186 seendot++;
187 CHECK_SIZE_TOKEN;
188 *e_token++ = *buf_ptr++;
189 if (*buf_ptr == '+' || *buf_ptr == '-')
190 *e_token++ = *buf_ptr++;
191 }
192 }
193 }
194 while (1) {
195 if (!(seensfx & 1) &&
196 (*buf_ptr == 'U' || *buf_ptr == 'u')) {
197 CHECK_SIZE_TOKEN;
198 *e_token++ = *buf_ptr++;
199 seensfx |= 1;
200 continue;
201 }
202 if (!(seensfx & 2) &&
203 (*buf_ptr == 'L' || *buf_ptr == 'l')) {
204 CHECK_SIZE_TOKEN;
205 if (buf_ptr[1] == buf_ptr[0])
206 *e_token++ = *buf_ptr++;
207 *e_token++ = *buf_ptr++;
208 seensfx |= 2;
209 continue;
210 }
211 break;
212 }
213 }
214 else
215 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
216 /* fill_buffer() terminates buffer with newline */
217 if (*buf_ptr == BACKSLASH) {
218 if (*(buf_ptr + 1) == '\n') {
219 buf_ptr += 2;
220 if (buf_ptr >= buf_end)
221 fill_buffer();
222 } else
223 break;
224 }
225 CHECK_SIZE_TOKEN;
226 /* copy it over */
227 *e_token++ = *buf_ptr++;
228 if (buf_ptr >= buf_end)
229 fill_buffer();
230 }
231 *e_token++ = '\0';
232 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
233 if (++buf_ptr >= buf_end)
234 fill_buffer();
235 }
236 ps.its_a_keyword = false;
237 ps.sizeof_keyword = false;
238 if (l_struct) { /* if last token was 'struct', then this token
239 * should be treated as a declaration */
240 l_struct = false;
241 last_code = ident;
242 ps.last_u_d = true;
243 return (decl);
244 }
245 ps.last_u_d = false; /* Operator after indentifier is binary */
246 last_code = ident; /* Remember that this is the code we will
247 * return */
248
249 /*
250 * This loop will check if the token is a keyword.
251 */
252 for (p = specials; (j = p->rwd) != 0; p++) {
253 const char *q = s_token; /* point at scanned token */
254 if (*j++ != *q++ || *j++ != *q++)
255 continue; /* This test depends on the fact that
256 * identifiers are always at least 1 character
257 * long (ie. the first two bytes of the
258 * identifier are always meaningful) */
259 if (q[-1] == 0)
260 break; /* If its a one-character identifier */
261 while (*q++ == *j)
262 if (*j++ == 0)
263 goto found_keyword; /* I wish that C had a multi-level
264 * break... */
265 }
266 if (p->rwd) { /* we have a keyword */
267 found_keyword:
268 ps.its_a_keyword = true;
269 ps.last_u_d = true;
270 switch (p->rwcode) {
271 case 1: /* it is a switch */
272 return (swstmt);
273 case 2: /* a case or default */
274 return (casestmt);
275
276 case 3: /* a "struct" */
277 if (ps.p_l_follow)
278 break; /* inside parens: cast */
279 /*
280 * Next time around, we may want to know that we have had a
281 * 'struct'
282 */
283 l_struct = true;
284
285 /*
286 * Fall through to test for a cast, function prototype or
287 * sizeof().
288 */
289 case 4: /* one of the declaration keywords */
290 if (ps.p_l_follow) {
291 ps.cast_mask |= 1 << ps.p_l_follow;
292
293 /*
294 * Forget that we saw `struct' if we're in a sizeof().
295 */
296 if (ps.sizeof_mask)
297 l_struct = false;
298
299 break; /* inside parens: cast, prototype or sizeof() */
300 }
301 last_code = decl;
302 return (decl);
303
304 case 5: /* if, while, for */
305 return (sp_paren);
306
307 case 6: /* do, else */
308 return (sp_nparen);
309
310 case 7:
311 ps.sizeof_keyword = true;
312 default: /* all others are treated like any other
313 * identifier */
314 return (ident);
315 } /* end of switch */
316 } /* end of if (found_it) */
317 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
157
158 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
159 int seendot = 0,
160 seenexp = 0,
161 seensfx = 0;
162 if (*buf_ptr == '0' &&
163 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
164 *e_token++ = *buf_ptr++;
165 *e_token++ = *buf_ptr++;
166 while (isxdigit(*buf_ptr)) {
167 CHECK_SIZE_TOKEN;
168 *e_token++ = *buf_ptr++;
169 }
170 }
171 else
172 while (1) {
173 if (*buf_ptr == '.') {
174 if (seendot)
175 break;
176 else
177 seendot++;
178 }
179 CHECK_SIZE_TOKEN;
180 *e_token++ = *buf_ptr++;
181 if (!isdigit(*buf_ptr) && *buf_ptr != '.') {
182 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
183 break;
184 else {
185 seenexp++;
186 seendot++;
187 CHECK_SIZE_TOKEN;
188 *e_token++ = *buf_ptr++;
189 if (*buf_ptr == '+' || *buf_ptr == '-')
190 *e_token++ = *buf_ptr++;
191 }
192 }
193 }
194 while (1) {
195 if (!(seensfx & 1) &&
196 (*buf_ptr == 'U' || *buf_ptr == 'u')) {
197 CHECK_SIZE_TOKEN;
198 *e_token++ = *buf_ptr++;
199 seensfx |= 1;
200 continue;
201 }
202 if (!(seensfx & 2) &&
203 (*buf_ptr == 'L' || *buf_ptr == 'l')) {
204 CHECK_SIZE_TOKEN;
205 if (buf_ptr[1] == buf_ptr[0])
206 *e_token++ = *buf_ptr++;
207 *e_token++ = *buf_ptr++;
208 seensfx |= 2;
209 continue;
210 }
211 break;
212 }
213 }
214 else
215 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
216 /* fill_buffer() terminates buffer with newline */
217 if (*buf_ptr == BACKSLASH) {
218 if (*(buf_ptr + 1) == '\n') {
219 buf_ptr += 2;
220 if (buf_ptr >= buf_end)
221 fill_buffer();
222 } else
223 break;
224 }
225 CHECK_SIZE_TOKEN;
226 /* copy it over */
227 *e_token++ = *buf_ptr++;
228 if (buf_ptr >= buf_end)
229 fill_buffer();
230 }
231 *e_token++ = '\0';
232 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
233 if (++buf_ptr >= buf_end)
234 fill_buffer();
235 }
236 ps.its_a_keyword = false;
237 ps.sizeof_keyword = false;
238 if (l_struct) { /* if last token was 'struct', then this token
239 * should be treated as a declaration */
240 l_struct = false;
241 last_code = ident;
242 ps.last_u_d = true;
243 return (decl);
244 }
245 ps.last_u_d = false; /* Operator after indentifier is binary */
246 last_code = ident; /* Remember that this is the code we will
247 * return */
248
249 /*
250 * This loop will check if the token is a keyword.
251 */
252 for (p = specials; (j = p->rwd) != 0; p++) {
253 const char *q = s_token; /* point at scanned token */
254 if (*j++ != *q++ || *j++ != *q++)
255 continue; /* This test depends on the fact that
256 * identifiers are always at least 1 character
257 * long (ie. the first two bytes of the
258 * identifier are always meaningful) */
259 if (q[-1] == 0)
260 break; /* If its a one-character identifier */
261 while (*q++ == *j)
262 if (*j++ == 0)
263 goto found_keyword; /* I wish that C had a multi-level
264 * break... */
265 }
266 if (p->rwd) { /* we have a keyword */
267 found_keyword:
268 ps.its_a_keyword = true;
269 ps.last_u_d = true;
270 switch (p->rwcode) {
271 case 1: /* it is a switch */
272 return (swstmt);
273 case 2: /* a case or default */
274 return (casestmt);
275
276 case 3: /* a "struct" */
277 if (ps.p_l_follow)
278 break; /* inside parens: cast */
279 /*
280 * Next time around, we may want to know that we have had a
281 * 'struct'
282 */
283 l_struct = true;
284
285 /*
286 * Fall through to test for a cast, function prototype or
287 * sizeof().
288 */
289 case 4: /* one of the declaration keywords */
290 if (ps.p_l_follow) {
291 ps.cast_mask |= 1 << ps.p_l_follow;
292
293 /*
294 * Forget that we saw `struct' if we're in a sizeof().
295 */
296 if (ps.sizeof_mask)
297 l_struct = false;
298
299 break; /* inside parens: cast, prototype or sizeof() */
300 }
301 last_code = decl;
302 return (decl);
303
304 case 5: /* if, while, for */
305 return (sp_paren);
306
307 case 6: /* do, else */
308 return (sp_nparen);
309
310 case 7:
311 ps.sizeof_keyword = true;
312 default: /* all others are treated like any other
313 * identifier */
314 return (ident);
315 } /* end of switch */
316 } /* end of if (found_it) */
317 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
318 register char *tp = buf_ptr;
318 char *tp = buf_ptr;
319 while (tp < buf_end)
320 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
321 goto not_proc;
322 strncpy(ps.procname, token, sizeof ps.procname - 1);
323 ps.in_parameter_declaration = 1;
324 rparen_count = 1;
325 not_proc:;
326 }
327 /*
328 * The following hack attempts to guess whether or not the current
329 * token is in fact a declaration keyword -- one that has been
330 * typedefd
331 */
332 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
333 && !ps.p_l_follow
334 && !ps.block_init
335 && (ps.last_token == rparen || ps.last_token == semicolon ||
336 ps.last_token == decl ||
337 ps.last_token == lbrace || ps.last_token == rbrace)) {
338 ps.its_a_keyword = true;
339 ps.last_u_d = true;
340 last_code = decl;
341 return decl;
342 }
343 if (last_code == decl) /* if this is a declared variable, then
344 * following sign is unary */
345 ps.last_u_d = true; /* will make "int a -1" work */
346 last_code = ident;
347 return (ident); /* the ident is not in the list */
348 } /* end of procesing for alpanum character */
349
350 /* Scan a non-alphanumeric token */
351
352 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
353 * moved here */
354 *e_token = '\0';
355 if (++buf_ptr >= buf_end)
356 fill_buffer();
357
358 switch (*token) {
359 case '\n':
360 unary_delim = ps.last_u_d;
361 ps.last_nl = true; /* remember that we just had a newline */
362 code = (had_eof ? 0 : newline);
363
364 /*
365 * if data has been exausted, the newline is a dummy, and we should
366 * return code to stop
367 */
368 break;
369
370 case '\'': /* start of quoted character */
371 case '"': /* start of string */
372 qchar = *token;
373 if (troff) {
374 e_token[-1] = '`';
375 if (qchar == '"')
376 *e_token++ = '`';
377 e_token = chfont(&bodyf, &stringf, e_token);
378 }
379 do { /* copy the string */
380 while (1) { /* move one character or [/<char>]<char> */
381 if (*buf_ptr == '\n') {
382 printf("%d: Unterminated literal\n", line_no);
383 goto stop_lit;
384 }
385 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
386 * since CHECK_SIZE guarantees that there
387 * are at least 5 entries left */
388 *e_token = *buf_ptr++;
389 if (buf_ptr >= buf_end)
390 fill_buffer();
391 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
392 if (*buf_ptr == '\n') /* check for escaped newline */
393 ++line_no;
394 if (troff) {
395 *++e_token = BACKSLASH;
396 if (*buf_ptr == BACKSLASH)
397 *++e_token = BACKSLASH;
398 }
399 *++e_token = *buf_ptr++;
400 ++e_token; /* we must increment this again because we
401 * copied two chars */
402 if (buf_ptr >= buf_end)
403 fill_buffer();
404 }
405 else
406 break; /* we copied one character */
407 } /* end of while (1) */
408 } while (*e_token++ != qchar);
409 if (troff) {
410 e_token = chfont(&stringf, &bodyf, e_token - 1);
411 if (qchar == '"')
412 *e_token++ = '\'';
413 }
414stop_lit:
415 code = ident;
416 break;
417
418 case ('('):
419 case ('['):
420 unary_delim = true;
421 code = lparen;
422 break;
423
424 case (')'):
425 case (']'):
426 code = rparen;
427 break;
428
429 case '#':
430 unary_delim = ps.last_u_d;
431 code = preesc;
432 break;
433
434 case '?':
435 unary_delim = true;
436 code = question;
437 break;
438
439 case (':'):
440 code = colon;
441 unary_delim = true;
442 break;
443
444 case (';'):
445 unary_delim = true;
446 code = semicolon;
447 break;
448
449 case ('{'):
450 unary_delim = true;
451
452 /*
453 * if (ps.in_or_st) ps.block_init = 1;
454 */
455 /* ? code = ps.block_init ? lparen : lbrace; */
456 code = lbrace;
457 break;
458
459 case ('}'):
460 unary_delim = true;
461 /* ? code = ps.block_init ? rparen : rbrace; */
462 code = rbrace;
463 break;
464
465 case 014: /* a form feed */
466 unary_delim = ps.last_u_d;
467 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
468 * right */
469 code = form_feed;
470 break;
471
472 case (','):
473 unary_delim = true;
474 code = comma;
475 break;
476
477 case '.':
478 unary_delim = false;
479 code = period;
480 break;
481
482 case '-':
483 case '+': /* check for -, +, --, ++ */
484 code = (ps.last_u_d ? unary_op : binary_op);
485 unary_delim = true;
486
487 if (*buf_ptr == token[0]) {
488 /* check for doubled character */
489 *e_token++ = *buf_ptr++;
490 /* buffer overflow will be checked at end of loop */
491 if (last_code == ident || last_code == rparen) {
492 code = (ps.last_u_d ? unary_op : postop);
493 /* check for following ++ or -- */
494 unary_delim = false;
495 }
496 }
497 else if (*buf_ptr == '=')
498 /* check for operator += */
499 *e_token++ = *buf_ptr++;
500 else if (*buf_ptr == '>') {
501 /* check for operator -> */
502 *e_token++ = *buf_ptr++;
503 if (!pointer_as_binop) {
504 unary_delim = false;
505 code = unary_op;
506 ps.want_blank = false;
507 }
508 }
509 break; /* buffer overflow will be checked at end of
510 * switch */
511
512 case '=':
513 if (ps.in_or_st)
514 ps.block_init = 1;
515#ifdef undef
516 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
517 e_token[-1] = *buf_ptr++;
518 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
519 *e_token++ = *buf_ptr++;
520 *e_token++ = '='; /* Flip =+ to += */
521 *e_token = 0;
522 }
523#else
524 if (*buf_ptr == '=') {/* == */
525 *e_token++ = '='; /* Flip =+ to += */
526 buf_ptr++;
527 *e_token = 0;
528 }
529#endif
530 code = binary_op;
531 unary_delim = true;
532 break;
533 /* can drop thru!!! */
534
535 case '>':
536 case '<':
537 case '!': /* ops like <, <<, <=, !=, etc */
538 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
539 *e_token++ = *buf_ptr;
540 if (++buf_ptr >= buf_end)
541 fill_buffer();
542 }
543 if (*buf_ptr == '=')
544 *e_token++ = *buf_ptr++;
545 code = (ps.last_u_d ? unary_op : binary_op);
546 unary_delim = true;
547 break;
548
549 default:
550 if (token[0] == '/' && *buf_ptr == '*') {
551 /* it is start of comment */
552 *e_token++ = '*';
553
554 if (++buf_ptr >= buf_end)
555 fill_buffer();
556
557 code = comment;
558 unary_delim = ps.last_u_d;
559 break;
560 }
561 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
562 /*
563 * handle ||, &&, etc, and also things as in int *****i
564 */
565 *e_token++ = *buf_ptr;
566 if (++buf_ptr >= buf_end)
567 fill_buffer();
568 }
569 code = (ps.last_u_d ? unary_op : binary_op);
570 unary_delim = true;
571
572
573 } /* end of switch */
574 if (code != newline) {
575 l_struct = false;
576 last_code = code;
577 }
578 if (buf_ptr >= buf_end) /* check for input buffer empty */
579 fill_buffer();
580 ps.last_u_d = unary_delim;
581 *e_token = '\0'; /* null terminate the token */
582 return (code);
583}
584
585/*
586 * Add the given keyword to the keyword table, using val as the keyword type
587 */
588void
589addkey(char *key, int val)
590{
319 while (tp < buf_end)
320 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
321 goto not_proc;
322 strncpy(ps.procname, token, sizeof ps.procname - 1);
323 ps.in_parameter_declaration = 1;
324 rparen_count = 1;
325 not_proc:;
326 }
327 /*
328 * The following hack attempts to guess whether or not the current
329 * token is in fact a declaration keyword -- one that has been
330 * typedefd
331 */
332 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
333 && !ps.p_l_follow
334 && !ps.block_init
335 && (ps.last_token == rparen || ps.last_token == semicolon ||
336 ps.last_token == decl ||
337 ps.last_token == lbrace || ps.last_token == rbrace)) {
338 ps.its_a_keyword = true;
339 ps.last_u_d = true;
340 last_code = decl;
341 return decl;
342 }
343 if (last_code == decl) /* if this is a declared variable, then
344 * following sign is unary */
345 ps.last_u_d = true; /* will make "int a -1" work */
346 last_code = ident;
347 return (ident); /* the ident is not in the list */
348 } /* end of procesing for alpanum character */
349
350 /* Scan a non-alphanumeric token */
351
352 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
353 * moved here */
354 *e_token = '\0';
355 if (++buf_ptr >= buf_end)
356 fill_buffer();
357
358 switch (*token) {
359 case '\n':
360 unary_delim = ps.last_u_d;
361 ps.last_nl = true; /* remember that we just had a newline */
362 code = (had_eof ? 0 : newline);
363
364 /*
365 * if data has been exausted, the newline is a dummy, and we should
366 * return code to stop
367 */
368 break;
369
370 case '\'': /* start of quoted character */
371 case '"': /* start of string */
372 qchar = *token;
373 if (troff) {
374 e_token[-1] = '`';
375 if (qchar == '"')
376 *e_token++ = '`';
377 e_token = chfont(&bodyf, &stringf, e_token);
378 }
379 do { /* copy the string */
380 while (1) { /* move one character or [/<char>]<char> */
381 if (*buf_ptr == '\n') {
382 printf("%d: Unterminated literal\n", line_no);
383 goto stop_lit;
384 }
385 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
386 * since CHECK_SIZE guarantees that there
387 * are at least 5 entries left */
388 *e_token = *buf_ptr++;
389 if (buf_ptr >= buf_end)
390 fill_buffer();
391 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
392 if (*buf_ptr == '\n') /* check for escaped newline */
393 ++line_no;
394 if (troff) {
395 *++e_token = BACKSLASH;
396 if (*buf_ptr == BACKSLASH)
397 *++e_token = BACKSLASH;
398 }
399 *++e_token = *buf_ptr++;
400 ++e_token; /* we must increment this again because we
401 * copied two chars */
402 if (buf_ptr >= buf_end)
403 fill_buffer();
404 }
405 else
406 break; /* we copied one character */
407 } /* end of while (1) */
408 } while (*e_token++ != qchar);
409 if (troff) {
410 e_token = chfont(&stringf, &bodyf, e_token - 1);
411 if (qchar == '"')
412 *e_token++ = '\'';
413 }
414stop_lit:
415 code = ident;
416 break;
417
418 case ('('):
419 case ('['):
420 unary_delim = true;
421 code = lparen;
422 break;
423
424 case (')'):
425 case (']'):
426 code = rparen;
427 break;
428
429 case '#':
430 unary_delim = ps.last_u_d;
431 code = preesc;
432 break;
433
434 case '?':
435 unary_delim = true;
436 code = question;
437 break;
438
439 case (':'):
440 code = colon;
441 unary_delim = true;
442 break;
443
444 case (';'):
445 unary_delim = true;
446 code = semicolon;
447 break;
448
449 case ('{'):
450 unary_delim = true;
451
452 /*
453 * if (ps.in_or_st) ps.block_init = 1;
454 */
455 /* ? code = ps.block_init ? lparen : lbrace; */
456 code = lbrace;
457 break;
458
459 case ('}'):
460 unary_delim = true;
461 /* ? code = ps.block_init ? rparen : rbrace; */
462 code = rbrace;
463 break;
464
465 case 014: /* a form feed */
466 unary_delim = ps.last_u_d;
467 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
468 * right */
469 code = form_feed;
470 break;
471
472 case (','):
473 unary_delim = true;
474 code = comma;
475 break;
476
477 case '.':
478 unary_delim = false;
479 code = period;
480 break;
481
482 case '-':
483 case '+': /* check for -, +, --, ++ */
484 code = (ps.last_u_d ? unary_op : binary_op);
485 unary_delim = true;
486
487 if (*buf_ptr == token[0]) {
488 /* check for doubled character */
489 *e_token++ = *buf_ptr++;
490 /* buffer overflow will be checked at end of loop */
491 if (last_code == ident || last_code == rparen) {
492 code = (ps.last_u_d ? unary_op : postop);
493 /* check for following ++ or -- */
494 unary_delim = false;
495 }
496 }
497 else if (*buf_ptr == '=')
498 /* check for operator += */
499 *e_token++ = *buf_ptr++;
500 else if (*buf_ptr == '>') {
501 /* check for operator -> */
502 *e_token++ = *buf_ptr++;
503 if (!pointer_as_binop) {
504 unary_delim = false;
505 code = unary_op;
506 ps.want_blank = false;
507 }
508 }
509 break; /* buffer overflow will be checked at end of
510 * switch */
511
512 case '=':
513 if (ps.in_or_st)
514 ps.block_init = 1;
515#ifdef undef
516 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
517 e_token[-1] = *buf_ptr++;
518 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
519 *e_token++ = *buf_ptr++;
520 *e_token++ = '='; /* Flip =+ to += */
521 *e_token = 0;
522 }
523#else
524 if (*buf_ptr == '=') {/* == */
525 *e_token++ = '='; /* Flip =+ to += */
526 buf_ptr++;
527 *e_token = 0;
528 }
529#endif
530 code = binary_op;
531 unary_delim = true;
532 break;
533 /* can drop thru!!! */
534
535 case '>':
536 case '<':
537 case '!': /* ops like <, <<, <=, !=, etc */
538 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
539 *e_token++ = *buf_ptr;
540 if (++buf_ptr >= buf_end)
541 fill_buffer();
542 }
543 if (*buf_ptr == '=')
544 *e_token++ = *buf_ptr++;
545 code = (ps.last_u_d ? unary_op : binary_op);
546 unary_delim = true;
547 break;
548
549 default:
550 if (token[0] == '/' && *buf_ptr == '*') {
551 /* it is start of comment */
552 *e_token++ = '*';
553
554 if (++buf_ptr >= buf_end)
555 fill_buffer();
556
557 code = comment;
558 unary_delim = ps.last_u_d;
559 break;
560 }
561 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
562 /*
563 * handle ||, &&, etc, and also things as in int *****i
564 */
565 *e_token++ = *buf_ptr;
566 if (++buf_ptr >= buf_end)
567 fill_buffer();
568 }
569 code = (ps.last_u_d ? unary_op : binary_op);
570 unary_delim = true;
571
572
573 } /* end of switch */
574 if (code != newline) {
575 l_struct = false;
576 last_code = code;
577 }
578 if (buf_ptr >= buf_end) /* check for input buffer empty */
579 fill_buffer();
580 ps.last_u_d = unary_delim;
581 *e_token = '\0'; /* null terminate the token */
582 return (code);
583}
584
585/*
586 * Add the given keyword to the keyword table, using val as the keyword type
587 */
588void
589addkey(char *key, int val)
590{
591 register struct templ *p = specials;
591 struct templ *p = specials;
592 while (p->rwd)
593 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
594 return;
595 else
596 p++;
597 if (p >= specials + sizeof specials / sizeof specials[0])
598 return; /* For now, table overflows are silently
599 * ignored */
600 p->rwd = key;
601 p->rwcode = val;
602 p[1].rwd = 0;
603 p[1].rwcode = 0;
604}
592 while (p->rwd)
593 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
594 return;
595 else
596 p++;
597 if (p >= specials + sizeof specials / sizeof specials[0])
598 return; /* For now, table overflows are silently
599 * ignored */
600 p->rwd = key;
601 p->rwcode = val;
602 p[1].rwd = 0;
603 p[1].rwcode = 0;
604}