1/* 2 * Copyright (c) 1985 Sun Microsystems, Inc. 3 * Copyright (c) 1980, 1993 4 * The Regents of the University of California. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36#if 0 37#ifndef lint 38static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; 39#endif /* not lint */ 40#endif 41#include <sys/cdefs.h>
| 1/* 2 * Copyright (c) 1985 Sun Microsystems, Inc. 3 * Copyright (c) 1980, 1993 4 * The Regents of the University of California. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36#if 0 37#ifndef lint 38static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; 39#endif /* not lint */ 40#endif 41#include <sys/cdefs.h>
|
42__FBSDID("$FreeBSD: head/usr.bin/indent/lexi.c 116390 2003-06-15 09:28:17Z charnier $");
| 42__FBSDID("$FreeBSD: head/usr.bin/indent/lexi.c 125618 2004-02-09 12:52:15Z bde $");
|
43 44/* 45 * Here we have the token scanner for indent. It scans off one token and puts 46 * it in the global variable "token". It returns a code, indicating the type 47 * of token scanned. 48 */ 49 50#include <err.h> 51#include <stdio.h> 52#include <ctype.h> 53#include <stdlib.h> 54#include <string.h> 55#include "indent_globs.h" 56#include "indent_codes.h" 57#include "indent.h" 58 59#define alphanum 1 60#define opchar 3 61 62struct templ { 63 const char *rwd; 64 int rwcode; 65}; 66
| 43 44/* 45 * Here we have the token scanner for indent. It scans off one token and puts 46 * it in the global variable "token". It returns a code, indicating the type 47 * of token scanned. 48 */ 49 50#include <err.h> 51#include <stdio.h> 52#include <ctype.h> 53#include <stdlib.h> 54#include <string.h> 55#include "indent_globs.h" 56#include "indent_codes.h" 57#include "indent.h" 58 59#define alphanum 1 60#define opchar 3 61 62struct templ { 63 const char *rwd; 64 int rwcode; 65}; 66
|
67struct templ specials[1000] =
| 67struct templ specials[100] =
|
68{ 69 {"switch", 1}, 70 {"case", 2}, 71 {"break", 0}, 72 {"struct", 3}, 73 {"union", 3}, 74 {"enum", 3}, 75 {"default", 2}, 76 {"int", 4}, 77 {"char", 4}, 78 {"float", 4}, 79 {"double", 4}, 80 {"long", 4}, 81 {"short", 4}, 82 {"typdef", 4}, 83 {"unsigned", 4}, 84 {"register", 4}, 85 {"static", 4}, 86 {"global", 4}, 87 {"extern", 4}, 88 {"void", 4}, 89 {"goto", 0}, 90 {"return", 0}, 91 {"if", 5}, 92 {"while", 5}, 93 {"for", 5}, 94 {"else", 6}, 95 {"do", 6}, 96 {"sizeof", 7},
| 68{ 69 {"switch", 1}, 70 {"case", 2}, 71 {"break", 0}, 72 {"struct", 3}, 73 {"union", 3}, 74 {"enum", 3}, 75 {"default", 2}, 76 {"int", 4}, 77 {"char", 4}, 78 {"float", 4}, 79 {"double", 4}, 80 {"long", 4}, 81 {"short", 4}, 82 {"typdef", 4}, 83 {"unsigned", 4}, 84 {"register", 4}, 85 {"static", 4}, 86 {"global", 4}, 87 {"extern", 4}, 88 {"void", 4}, 89 {"goto", 0}, 90 {"return", 0}, 91 {"if", 5}, 92 {"while", 5}, 93 {"for", 5}, 94 {"else", 6}, 95 {"do", 6}, 96 {"sizeof", 7},
|
97 {"const", 9}, 98 {"volatile", 9},
| |
99 {0, 0} 100}; 101 102char chartype[128] = 103{ /* this is used to facilitate the decision of 104 * what type (alphanumeric, operator) each 105 * character is */ 106 0, 0, 0, 0, 0, 0, 0, 0, 107 0, 0, 0, 0, 0, 0, 0, 0, 108 0, 0, 0, 0, 0, 0, 0, 0, 109 0, 0, 0, 0, 0, 0, 0, 0, 110 0, 3, 0, 0, 1, 3, 3, 0, 111 0, 0, 3, 3, 0, 3, 0, 3, 112 1, 1, 1, 1, 1, 1, 1, 1, 113 1, 1, 0, 0, 3, 3, 3, 3, 114 0, 1, 1, 1, 1, 1, 1, 1, 115 1, 1, 1, 1, 1, 1, 1, 1, 116 1, 1, 1, 1, 1, 1, 1, 1, 117 1, 1, 1, 0, 0, 0, 3, 1, 118 0, 1, 1, 1, 1, 1, 1, 1, 119 1, 1, 1, 1, 1, 1, 1, 1, 120 1, 1, 1, 1, 1, 1, 1, 1, 121 1, 1, 1, 0, 3, 0, 3, 0 122}; 123 124int 125lexi(void) 126{ 127 int unary_delim; /* this is set to 1 if the current token 128 * forces a following operator to be unary */ 129 static int last_code; /* the last token type returned */ 130 static int l_struct; /* set to 1 if the last token was 'struct' */ 131 int code; /* internal code to be returned */ 132 char qchar; /* the delimiter character for a string */ 133 134 e_token = s_token; /* point to start of place to save token */ 135 unary_delim = false; 136 ps.col_1 = ps.last_nl; /* tell world that this token started in 137 * column 1 iff the last thing scanned was nl */ 138 ps.last_nl = false; 139 140 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 141 ps.col_1 = false; /* leading blanks imply token is not in column 142 * 1 */ 143 if (++buf_ptr >= buf_end) 144 fill_buffer(); 145 } 146 147 /* Scan an alphanumeric token */ 148 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 149 /* 150 * we have a character or number 151 */ 152 const char *j; /* used for searching thru list of 153 * 154 * reserved words */ 155 struct templ *p; 156 157 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 158 int seendot = 0, 159 seenexp = 0, 160 seensfx = 0; 161 if (*buf_ptr == '0' && 162 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { 163 *e_token++ = *buf_ptr++; 164 *e_token++ = *buf_ptr++; 165 while (isxdigit(*buf_ptr)) { 166 CHECK_SIZE_TOKEN; 167 *e_token++ = *buf_ptr++; 168 } 169 } 170 else 171 while (1) { 172 if (*buf_ptr == '.') { 173 if (seendot) 174 break; 175 else 176 seendot++; 177 } 178 CHECK_SIZE_TOKEN; 179 *e_token++ = *buf_ptr++; 180 if (!isdigit(*buf_ptr) && *buf_ptr != '.') { 181 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) 182 break; 183 else { 184 seenexp++; 185 seendot++; 186 CHECK_SIZE_TOKEN; 187 *e_token++ = *buf_ptr++; 188 if (*buf_ptr == '+' || *buf_ptr == '-') 189 *e_token++ = *buf_ptr++; 190 } 191 } 192 } 193 while (1) { 194 if (!(seensfx & 1) && 195 (*buf_ptr == 'U' || *buf_ptr == 'u')) { 196 CHECK_SIZE_TOKEN; 197 *e_token++ = *buf_ptr++; 198 seensfx |= 1; 199 continue; 200 } 201 if (!(seensfx & 2) && 202 (*buf_ptr == 'L' || *buf_ptr == 'l')) { 203 CHECK_SIZE_TOKEN; 204 if (buf_ptr[1] == buf_ptr[0]) 205 *e_token++ = *buf_ptr++; 206 *e_token++ = *buf_ptr++; 207 seensfx |= 2; 208 continue; 209 } 210 break; 211 } 212 } 213 else 214 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) { 215 /* fill_buffer() terminates buffer with newline */ 216 if (*buf_ptr == BACKSLASH) { 217 if (*(buf_ptr + 1) == '\n') { 218 buf_ptr += 2; 219 if (buf_ptr >= buf_end) 220 fill_buffer(); 221 } else 222 break; 223 } 224 CHECK_SIZE_TOKEN; 225 /* copy it over */ 226 *e_token++ = *buf_ptr++; 227 if (buf_ptr >= buf_end) 228 fill_buffer(); 229 } 230 *e_token++ = '\0'; 231 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 232 if (++buf_ptr >= buf_end) 233 fill_buffer(); 234 } 235 ps.its_a_keyword = false; 236 ps.sizeof_keyword = false; 237 if (l_struct) { /* if last token was 'struct', then this token 238 * should be treated as a declaration */ 239 l_struct = false; 240 last_code = ident; 241 ps.last_u_d = true; 242 return (decl); 243 } 244 ps.last_u_d = false; /* Operator after identifier is binary */ 245 last_code = ident; /* Remember that this is the code we will 246 * return */ 247 248 /* 249 * This loop will check if the token is a keyword. 250 */ 251 for (p = specials; (j = p->rwd) != 0; p++) { 252 const char *q = s_token; /* point at scanned token */ 253 if (*j++ != *q++ || *j++ != *q++) 254 continue; /* This test depends on the fact that 255 * identifiers are always at least 1 character 256 * long (ie. the first two bytes of the 257 * identifier are always meaningful) */ 258 if (q[-1] == 0) 259 break; /* If its a one-character identifier */ 260 while (*q++ == *j) 261 if (*j++ == 0) 262 goto found_keyword; /* I wish that C had a multi-level 263 * break... */ 264 } 265 if (p->rwd) { /* we have a keyword */ 266 found_keyword: 267 ps.its_a_keyword = true; 268 ps.last_u_d = true; 269 switch (p->rwcode) { 270 case 1: /* it is a switch */ 271 return (swstmt); 272 case 2: /* a case or default */ 273 return (casestmt); 274 275 case 3: /* a "struct" */ 276 if (ps.p_l_follow)
| 97 {0, 0} 98}; 99 100char chartype[128] = 101{ /* this is used to facilitate the decision of 102 * what type (alphanumeric, operator) each 103 * character is */ 104 0, 0, 0, 0, 0, 0, 0, 0, 105 0, 0, 0, 0, 0, 0, 0, 0, 106 0, 0, 0, 0, 0, 0, 0, 0, 107 0, 0, 0, 0, 0, 0, 0, 0, 108 0, 3, 0, 0, 1, 3, 3, 0, 109 0, 0, 3, 3, 0, 3, 0, 3, 110 1, 1, 1, 1, 1, 1, 1, 1, 111 1, 1, 0, 0, 3, 3, 3, 3, 112 0, 1, 1, 1, 1, 1, 1, 1, 113 1, 1, 1, 1, 1, 1, 1, 1, 114 1, 1, 1, 1, 1, 1, 1, 1, 115 1, 1, 1, 0, 0, 0, 3, 1, 116 0, 1, 1, 1, 1, 1, 1, 1, 117 1, 1, 1, 1, 1, 1, 1, 1, 118 1, 1, 1, 1, 1, 1, 1, 1, 119 1, 1, 1, 0, 3, 0, 3, 0 120}; 121 122int 123lexi(void) 124{ 125 int unary_delim; /* this is set to 1 if the current token 126 * forces a following operator to be unary */ 127 static int last_code; /* the last token type returned */ 128 static int l_struct; /* set to 1 if the last token was 'struct' */ 129 int code; /* internal code to be returned */ 130 char qchar; /* the delimiter character for a string */ 131 132 e_token = s_token; /* point to start of place to save token */ 133 unary_delim = false; 134 ps.col_1 = ps.last_nl; /* tell world that this token started in 135 * column 1 iff the last thing scanned was nl */ 136 ps.last_nl = false; 137 138 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 139 ps.col_1 = false; /* leading blanks imply token is not in column 140 * 1 */ 141 if (++buf_ptr >= buf_end) 142 fill_buffer(); 143 } 144 145 /* Scan an alphanumeric token */ 146 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 147 /* 148 * we have a character or number 149 */ 150 const char *j; /* used for searching thru list of 151 * 152 * reserved words */ 153 struct templ *p; 154 155 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 156 int seendot = 0, 157 seenexp = 0, 158 seensfx = 0; 159 if (*buf_ptr == '0' && 160 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { 161 *e_token++ = *buf_ptr++; 162 *e_token++ = *buf_ptr++; 163 while (isxdigit(*buf_ptr)) { 164 CHECK_SIZE_TOKEN; 165 *e_token++ = *buf_ptr++; 166 } 167 } 168 else 169 while (1) { 170 if (*buf_ptr == '.') { 171 if (seendot) 172 break; 173 else 174 seendot++; 175 } 176 CHECK_SIZE_TOKEN; 177 *e_token++ = *buf_ptr++; 178 if (!isdigit(*buf_ptr) && *buf_ptr != '.') { 179 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) 180 break; 181 else { 182 seenexp++; 183 seendot++; 184 CHECK_SIZE_TOKEN; 185 *e_token++ = *buf_ptr++; 186 if (*buf_ptr == '+' || *buf_ptr == '-') 187 *e_token++ = *buf_ptr++; 188 } 189 } 190 } 191 while (1) { 192 if (!(seensfx & 1) && 193 (*buf_ptr == 'U' || *buf_ptr == 'u')) { 194 CHECK_SIZE_TOKEN; 195 *e_token++ = *buf_ptr++; 196 seensfx |= 1; 197 continue; 198 } 199 if (!(seensfx & 2) && 200 (*buf_ptr == 'L' || *buf_ptr == 'l')) { 201 CHECK_SIZE_TOKEN; 202 if (buf_ptr[1] == buf_ptr[0]) 203 *e_token++ = *buf_ptr++; 204 *e_token++ = *buf_ptr++; 205 seensfx |= 2; 206 continue; 207 } 208 break; 209 } 210 } 211 else 212 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) { 213 /* fill_buffer() terminates buffer with newline */ 214 if (*buf_ptr == BACKSLASH) { 215 if (*(buf_ptr + 1) == '\n') { 216 buf_ptr += 2; 217 if (buf_ptr >= buf_end) 218 fill_buffer(); 219 } else 220 break; 221 } 222 CHECK_SIZE_TOKEN; 223 /* copy it over */ 224 *e_token++ = *buf_ptr++; 225 if (buf_ptr >= buf_end) 226 fill_buffer(); 227 } 228 *e_token++ = '\0'; 229 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 230 if (++buf_ptr >= buf_end) 231 fill_buffer(); 232 } 233 ps.its_a_keyword = false; 234 ps.sizeof_keyword = false; 235 if (l_struct) { /* if last token was 'struct', then this token 236 * should be treated as a declaration */ 237 l_struct = false; 238 last_code = ident; 239 ps.last_u_d = true; 240 return (decl); 241 } 242 ps.last_u_d = false; /* Operator after identifier is binary */ 243 last_code = ident; /* Remember that this is the code we will 244 * return */ 245 246 /* 247 * This loop will check if the token is a keyword. 248 */ 249 for (p = specials; (j = p->rwd) != 0; p++) { 250 const char *q = s_token; /* point at scanned token */ 251 if (*j++ != *q++ || *j++ != *q++) 252 continue; /* This test depends on the fact that 253 * identifiers are always at least 1 character 254 * long (ie. the first two bytes of the 255 * identifier are always meaningful) */ 256 if (q[-1] == 0) 257 break; /* If its a one-character identifier */ 258 while (*q++ == *j) 259 if (*j++ == 0) 260 goto found_keyword; /* I wish that C had a multi-level 261 * break... */ 262 } 263 if (p->rwd) { /* we have a keyword */ 264 found_keyword: 265 ps.its_a_keyword = true; 266 ps.last_u_d = true; 267 switch (p->rwcode) { 268 case 1: /* it is a switch */ 269 return (swstmt); 270 case 2: /* a case or default */ 271 return (casestmt); 272 273 case 3: /* a "struct" */ 274 if (ps.p_l_follow)
|
277 break; /* inside parens: cast */ 278 /* 279 * Next time around, we may want to know that we have had a 280 * 'struct' 281 */
| 275 break; /* inside parens: cast */
|
282 l_struct = true; 283 284 /*
| 276 l_struct = true; 277 278 /*
|
285 * Fall through to test for a cast, function prototype or 286 * sizeof().
| 279 * Next time around, we will want to know that we have had a 280 * 'struct'
|
287 */ 288 case 4: /* one of the declaration keywords */ 289 if (ps.p_l_follow) { 290 ps.cast_mask |= 1 << ps.p_l_follow;
| 281 */ 282 case 4: /* one of the declaration keywords */ 283 if (ps.p_l_follow) { 284 ps.cast_mask |= 1 << ps.p_l_follow;
|
291 292 /* 293 * Forget that we saw `struct' if we're in a sizeof(). 294 */ 295 if (ps.sizeof_mask) 296 l_struct = false; 297 298 break; /* inside parens: cast, prototype or sizeof() */
| 285 break; /* inside parens: cast */
|
299 } 300 last_code = decl; 301 return (decl); 302 303 case 5: /* if, while, for */ 304 return (sp_paren); 305 306 case 6: /* do, else */ 307 return (sp_nparen); 308 309 case 7: 310 ps.sizeof_keyword = true; 311 default: /* all others are treated like any other 312 * identifier */ 313 return (ident); 314 } /* end of switch */ 315 } /* end of if (found_it) */ 316 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { 317 char *tp = buf_ptr; 318 while (tp < buf_end) 319 if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 320 goto not_proc; 321 strncpy(ps.procname, token, sizeof ps.procname - 1); 322 ps.in_parameter_declaration = 1; 323 rparen_count = 1; 324 not_proc:; 325 } 326 /* 327 * The following hack attempts to guess whether or not the current 328 * token is in fact a declaration keyword -- one that has been 329 * typedefd 330 */ 331 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') 332 && !ps.p_l_follow 333 && !ps.block_init 334 && (ps.last_token == rparen || ps.last_token == semicolon || 335 ps.last_token == decl || 336 ps.last_token == lbrace || ps.last_token == rbrace)) { 337 ps.its_a_keyword = true; 338 ps.last_u_d = true; 339 last_code = decl; 340 return decl; 341 } 342 if (last_code == decl) /* if this is a declared variable, then 343 * following sign is unary */ 344 ps.last_u_d = true; /* will make "int a -1" work */ 345 last_code = ident; 346 return (ident); /* the ident is not in the list */ 347 } /* end of procesing for alpanum character */ 348 349 /* Scan a non-alphanumeric token */ 350 351 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 352 * moved here */ 353 *e_token = '\0'; 354 if (++buf_ptr >= buf_end) 355 fill_buffer(); 356 357 switch (*token) { 358 case '\n': 359 unary_delim = ps.last_u_d; 360 ps.last_nl = true; /* remember that we just had a newline */ 361 code = (had_eof ? 0 : newline); 362 363 /* 364 * if data has been exhausted, the newline is a dummy, and we should 365 * return code to stop 366 */ 367 break; 368 369 case '\'': /* start of quoted character */ 370 case '"': /* start of string */ 371 qchar = *token; 372 if (troff) { 373 e_token[-1] = '`'; 374 if (qchar == '"') 375 *e_token++ = '`'; 376 e_token = chfont(&bodyf, &stringf, e_token); 377 } 378 do { /* copy the string */ 379 while (1) { /* move one character or [/<char>]<char> */ 380 if (*buf_ptr == '\n') { 381 printf("%d: Unterminated literal\n", line_no); 382 goto stop_lit; 383 } 384 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 385 * since CHECK_SIZE guarantees that there 386 * are at least 5 entries left */ 387 *e_token = *buf_ptr++; 388 if (buf_ptr >= buf_end) 389 fill_buffer(); 390 if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 391 if (*buf_ptr == '\n') /* check for escaped newline */ 392 ++line_no; 393 if (troff) { 394 *++e_token = BACKSLASH; 395 if (*buf_ptr == BACKSLASH) 396 *++e_token = BACKSLASH; 397 } 398 *++e_token = *buf_ptr++; 399 ++e_token; /* we must increment this again because we 400 * copied two chars */ 401 if (buf_ptr >= buf_end) 402 fill_buffer(); 403 } 404 else 405 break; /* we copied one character */ 406 } /* end of while (1) */ 407 } while (*e_token++ != qchar); 408 if (troff) { 409 e_token = chfont(&stringf, &bodyf, e_token - 1); 410 if (qchar == '"') 411 *e_token++ = '\''; 412 } 413stop_lit: 414 code = ident; 415 break; 416 417 case ('('): 418 case ('['): 419 unary_delim = true; 420 code = lparen; 421 break; 422 423 case (')'): 424 case (']'): 425 code = rparen; 426 break; 427 428 case '#': 429 unary_delim = ps.last_u_d; 430 code = preesc; 431 break; 432 433 case '?': 434 unary_delim = true; 435 code = question; 436 break; 437 438 case (':'): 439 code = colon; 440 unary_delim = true; 441 break; 442 443 case (';'): 444 unary_delim = true; 445 code = semicolon; 446 break; 447 448 case ('{'): 449 unary_delim = true; 450 451 /* 452 * if (ps.in_or_st) ps.block_init = 1; 453 */ 454 /* ? code = ps.block_init ? lparen : lbrace; */ 455 code = lbrace; 456 break; 457 458 case ('}'): 459 unary_delim = true; 460 /* ? code = ps.block_init ? rparen : rbrace; */ 461 code = rbrace; 462 break; 463 464 case 014: /* a form feed */ 465 unary_delim = ps.last_u_d; 466 ps.last_nl = true; /* remember this so we can set 'ps.col_1' 467 * right */ 468 code = form_feed; 469 break; 470 471 case (','): 472 unary_delim = true; 473 code = comma; 474 break; 475 476 case '.': 477 unary_delim = false; 478 code = period; 479 break; 480 481 case '-': 482 case '+': /* check for -, +, --, ++ */ 483 code = (ps.last_u_d ? unary_op : binary_op); 484 unary_delim = true; 485 486 if (*buf_ptr == token[0]) { 487 /* check for doubled character */ 488 *e_token++ = *buf_ptr++; 489 /* buffer overflow will be checked at end of loop */ 490 if (last_code == ident || last_code == rparen) { 491 code = (ps.last_u_d ? unary_op : postop); 492 /* check for following ++ or -- */ 493 unary_delim = false; 494 } 495 } 496 else if (*buf_ptr == '=') 497 /* check for operator += */ 498 *e_token++ = *buf_ptr++; 499 else if (*buf_ptr == '>') { 500 /* check for operator -> */ 501 *e_token++ = *buf_ptr++; 502 if (!pointer_as_binop) { 503 unary_delim = false; 504 code = unary_op; 505 ps.want_blank = false; 506 } 507 } 508 break; /* buffer overflow will be checked at end of 509 * switch */ 510 511 case '=': 512 if (ps.in_or_st) 513 ps.block_init = 1; 514#ifdef undef 515 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ 516 e_token[-1] = *buf_ptr++; 517 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) 518 *e_token++ = *buf_ptr++; 519 *e_token++ = '='; /* Flip =+ to += */ 520 *e_token = 0; 521 } 522#else 523 if (*buf_ptr == '=') {/* == */ 524 *e_token++ = '='; /* Flip =+ to += */ 525 buf_ptr++; 526 *e_token = 0; 527 } 528#endif 529 code = binary_op; 530 unary_delim = true; 531 break; 532 /* can drop thru!!! */ 533 534 case '>': 535 case '<': 536 case '!': /* ops like <, <<, <=, !=, etc */ 537 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 538 *e_token++ = *buf_ptr; 539 if (++buf_ptr >= buf_end) 540 fill_buffer(); 541 } 542 if (*buf_ptr == '=') 543 *e_token++ = *buf_ptr++; 544 code = (ps.last_u_d ? unary_op : binary_op); 545 unary_delim = true; 546 break; 547 548 default: 549 if (token[0] == '/' && *buf_ptr == '*') { 550 /* it is start of comment */ 551 *e_token++ = '*'; 552 553 if (++buf_ptr >= buf_end) 554 fill_buffer(); 555 556 code = comment; 557 unary_delim = ps.last_u_d; 558 break; 559 } 560 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 561 /* 562 * handle ||, &&, etc, and also things as in int *****i 563 */ 564 *e_token++ = *buf_ptr; 565 if (++buf_ptr >= buf_end) 566 fill_buffer(); 567 } 568 code = (ps.last_u_d ? unary_op : binary_op); 569 unary_delim = true; 570 571 572 } /* end of switch */ 573 if (code != newline) { 574 l_struct = false; 575 last_code = code; 576 } 577 if (buf_ptr >= buf_end) /* check for input buffer empty */ 578 fill_buffer(); 579 ps.last_u_d = unary_delim; 580 *e_token = '\0'; /* null terminate the token */ 581 return (code); 582} 583 584/* 585 * Add the given keyword to the keyword table, using val as the keyword type 586 */ 587void 588addkey(char *key, int val) 589{ 590 struct templ *p = specials; 591 while (p->rwd) 592 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) 593 return; 594 else 595 p++; 596 if (p >= specials + sizeof specials / sizeof specials[0]) 597 return; /* For now, table overflows are silently 598 * ignored */ 599 p->rwd = key; 600 p->rwcode = val; 601 p[1].rwd = 0; 602 p[1].rwcode = 0; 603}
| 286 } 287 last_code = decl; 288 return (decl); 289 290 case 5: /* if, while, for */ 291 return (sp_paren); 292 293 case 6: /* do, else */ 294 return (sp_nparen); 295 296 case 7: 297 ps.sizeof_keyword = true; 298 default: /* all others are treated like any other 299 * identifier */ 300 return (ident); 301 } /* end of switch */ 302 } /* end of if (found_it) */ 303 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { 304 char *tp = buf_ptr; 305 while (tp < buf_end) 306 if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 307 goto not_proc; 308 strncpy(ps.procname, token, sizeof ps.procname - 1); 309 ps.in_parameter_declaration = 1; 310 rparen_count = 1; 311 not_proc:; 312 } 313 /* 314 * The following hack attempts to guess whether or not the current 315 * token is in fact a declaration keyword -- one that has been 316 * typedefd 317 */ 318 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') 319 && !ps.p_l_follow 320 && !ps.block_init 321 && (ps.last_token == rparen || ps.last_token == semicolon || 322 ps.last_token == decl || 323 ps.last_token == lbrace || ps.last_token == rbrace)) { 324 ps.its_a_keyword = true; 325 ps.last_u_d = true; 326 last_code = decl; 327 return decl; 328 } 329 if (last_code == decl) /* if this is a declared variable, then 330 * following sign is unary */ 331 ps.last_u_d = true; /* will make "int a -1" work */ 332 last_code = ident; 333 return (ident); /* the ident is not in the list */ 334 } /* end of procesing for alpanum character */ 335 336 /* Scan a non-alphanumeric token */ 337 338 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 339 * moved here */ 340 *e_token = '\0'; 341 if (++buf_ptr >= buf_end) 342 fill_buffer(); 343 344 switch (*token) { 345 case '\n': 346 unary_delim = ps.last_u_d; 347 ps.last_nl = true; /* remember that we just had a newline */ 348 code = (had_eof ? 0 : newline); 349 350 /* 351 * if data has been exhausted, the newline is a dummy, and we should 352 * return code to stop 353 */ 354 break; 355 356 case '\'': /* start of quoted character */ 357 case '"': /* start of string */ 358 qchar = *token; 359 if (troff) { 360 e_token[-1] = '`'; 361 if (qchar == '"') 362 *e_token++ = '`'; 363 e_token = chfont(&bodyf, &stringf, e_token); 364 } 365 do { /* copy the string */ 366 while (1) { /* move one character or [/<char>]<char> */ 367 if (*buf_ptr == '\n') { 368 printf("%d: Unterminated literal\n", line_no); 369 goto stop_lit; 370 } 371 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 372 * since CHECK_SIZE guarantees that there 373 * are at least 5 entries left */ 374 *e_token = *buf_ptr++; 375 if (buf_ptr >= buf_end) 376 fill_buffer(); 377 if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 378 if (*buf_ptr == '\n') /* check for escaped newline */ 379 ++line_no; 380 if (troff) { 381 *++e_token = BACKSLASH; 382 if (*buf_ptr == BACKSLASH) 383 *++e_token = BACKSLASH; 384 } 385 *++e_token = *buf_ptr++; 386 ++e_token; /* we must increment this again because we 387 * copied two chars */ 388 if (buf_ptr >= buf_end) 389 fill_buffer(); 390 } 391 else 392 break; /* we copied one character */ 393 } /* end of while (1) */ 394 } while (*e_token++ != qchar); 395 if (troff) { 396 e_token = chfont(&stringf, &bodyf, e_token - 1); 397 if (qchar == '"') 398 *e_token++ = '\''; 399 } 400stop_lit: 401 code = ident; 402 break; 403 404 case ('('): 405 case ('['): 406 unary_delim = true; 407 code = lparen; 408 break; 409 410 case (')'): 411 case (']'): 412 code = rparen; 413 break; 414 415 case '#': 416 unary_delim = ps.last_u_d; 417 code = preesc; 418 break; 419 420 case '?': 421 unary_delim = true; 422 code = question; 423 break; 424 425 case (':'): 426 code = colon; 427 unary_delim = true; 428 break; 429 430 case (';'): 431 unary_delim = true; 432 code = semicolon; 433 break; 434 435 case ('{'): 436 unary_delim = true; 437 438 /* 439 * if (ps.in_or_st) ps.block_init = 1; 440 */ 441 /* ? code = ps.block_init ? lparen : lbrace; */ 442 code = lbrace; 443 break; 444 445 case ('}'): 446 unary_delim = true; 447 /* ? code = ps.block_init ? rparen : rbrace; */ 448 code = rbrace; 449 break; 450 451 case 014: /* a form feed */ 452 unary_delim = ps.last_u_d; 453 ps.last_nl = true; /* remember this so we can set 'ps.col_1' 454 * right */ 455 code = form_feed; 456 break; 457 458 case (','): 459 unary_delim = true; 460 code = comma; 461 break; 462 463 case '.': 464 unary_delim = false; 465 code = period; 466 break; 467 468 case '-': 469 case '+': /* check for -, +, --, ++ */ 470 code = (ps.last_u_d ? unary_op : binary_op); 471 unary_delim = true; 472 473 if (*buf_ptr == token[0]) { 474 /* check for doubled character */ 475 *e_token++ = *buf_ptr++; 476 /* buffer overflow will be checked at end of loop */ 477 if (last_code == ident || last_code == rparen) { 478 code = (ps.last_u_d ? unary_op : postop); 479 /* check for following ++ or -- */ 480 unary_delim = false; 481 } 482 } 483 else if (*buf_ptr == '=') 484 /* check for operator += */ 485 *e_token++ = *buf_ptr++; 486 else if (*buf_ptr == '>') { 487 /* check for operator -> */ 488 *e_token++ = *buf_ptr++; 489 if (!pointer_as_binop) { 490 unary_delim = false; 491 code = unary_op; 492 ps.want_blank = false; 493 } 494 } 495 break; /* buffer overflow will be checked at end of 496 * switch */ 497 498 case '=': 499 if (ps.in_or_st) 500 ps.block_init = 1; 501#ifdef undef 502 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ 503 e_token[-1] = *buf_ptr++; 504 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) 505 *e_token++ = *buf_ptr++; 506 *e_token++ = '='; /* Flip =+ to += */ 507 *e_token = 0; 508 } 509#else 510 if (*buf_ptr == '=') {/* == */ 511 *e_token++ = '='; /* Flip =+ to += */ 512 buf_ptr++; 513 *e_token = 0; 514 } 515#endif 516 code = binary_op; 517 unary_delim = true; 518 break; 519 /* can drop thru!!! */ 520 521 case '>': 522 case '<': 523 case '!': /* ops like <, <<, <=, !=, etc */ 524 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 525 *e_token++ = *buf_ptr; 526 if (++buf_ptr >= buf_end) 527 fill_buffer(); 528 } 529 if (*buf_ptr == '=') 530 *e_token++ = *buf_ptr++; 531 code = (ps.last_u_d ? unary_op : binary_op); 532 unary_delim = true; 533 break; 534 535 default: 536 if (token[0] == '/' && *buf_ptr == '*') { 537 /* it is start of comment */ 538 *e_token++ = '*'; 539 540 if (++buf_ptr >= buf_end) 541 fill_buffer(); 542 543 code = comment; 544 unary_delim = ps.last_u_d; 545 break; 546 } 547 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 548 /* 549 * handle ||, &&, etc, and also things as in int *****i 550 */ 551 *e_token++ = *buf_ptr; 552 if (++buf_ptr >= buf_end) 553 fill_buffer(); 554 } 555 code = (ps.last_u_d ? unary_op : binary_op); 556 unary_delim = true; 557 558 559 } /* end of switch */ 560 if (code != newline) { 561 l_struct = false; 562 last_code = code; 563 } 564 if (buf_ptr >= buf_end) /* check for input buffer empty */ 565 fill_buffer(); 566 ps.last_u_d = unary_delim; 567 *e_token = '\0'; /* null terminate the token */ 568 return (code); 569} 570 571/* 572 * Add the given keyword to the keyword table, using val as the keyword type 573 */ 574void 575addkey(char *key, int val) 576{ 577 struct templ *p = specials; 578 while (p->rwd) 579 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) 580 return; 581 else 582 p++; 583 if (p >= specials + sizeof specials / sizeof specials[0]) 584 return; /* For now, table overflows are silently 585 * ignored */ 586 p->rwd = key; 587 p->rwcode = val; 588 p[1].rwd = 0; 589 p[1].rwcode = 0; 590}
|