Cross Reference: /freebsd-10.0-release/usr.bin/indent/lexi.c

Deleted Added

sdiff udiff text old ( 116390 ) new ( 125618 )

full compact

lexi.c (116390)	lexi.c (125618)
1/* 2 * Copyright (c) 1985 Sun Microsystems, Inc. 3 * Copyright (c) 1980, 1993 4 * The Regents of the University of California. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 / 35 36#if 0 37#ifndef lint 38static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; 39#endif / not lint */ 40#endif 41#include <sys/cdefs.h>	1/* 2 * Copyright (c) 1985 Sun Microsystems, Inc. 3 * Copyright (c) 1980, 1993 4 * The Regents of the University of California. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 / 35 36#if 0 37#ifndef lint 38static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; 39#endif / not lint */ 40#endif 41#include <sys/cdefs.h>
42__FBSDID("$FreeBSD: head/usr.bin/indent/lexi.c 116390 2003-06-15 09:28:17Z charnier $");	42__FBSDID("$FreeBSD: head/usr.bin/indent/lexi.c 125618 2004-02-09 12:52:15Z bde $");
43 44/* 45 * Here we have the token scanner for indent. It scans off one token and puts 46 * it in the global variable "token". It returns a code, indicating the type 47 * of token scanned. 48 / 49 50#include <err.h> 51#include <stdio.h> 52#include <ctype.h> 53#include <stdlib.h> 54#include <string.h> 55#include "indent_globs.h" 56#include "indent_codes.h" 57#include "indent.h" 58 59#define alphanum 1 60#define opchar 3 61 62struct templ { 63 const char rwd; 64 int rwcode; 65}; 66	43 44/* 45 * Here we have the token scanner for indent. It scans off one token and puts 46 * it in the global variable "token". It returns a code, indicating the type 47 * of token scanned. 48 / 49 50#include <err.h> 51#include <stdio.h> 52#include <ctype.h> 53#include <stdlib.h> 54#include <string.h> 55#include "indent_globs.h" 56#include "indent_codes.h" 57#include "indent.h" 58 59#define alphanum 1 60#define opchar 3 61 62struct templ { 63 const char rwd; 64 int rwcode; 65}; 66
67struct templ specials[1000] =	67struct templ specials[100] =
68{ 69 {"switch", 1}, 70 {"case", 2}, 71 {"break", 0}, 72 {"struct", 3}, 73 {"union", 3}, 74 {"enum", 3}, 75 {"default", 2}, 76 {"int", 4}, 77 {"char", 4}, 78 {"float", 4}, 79 {"double", 4}, 80 {"long", 4}, 81 {"short", 4}, 82 {"typdef", 4}, 83 {"unsigned", 4}, 84 {"register", 4}, 85 {"static", 4}, 86 {"global", 4}, 87 {"extern", 4}, 88 {"void", 4}, 89 {"goto", 0}, 90 {"return", 0}, 91 {"if", 5}, 92 {"while", 5}, 93 {"for", 5}, 94 {"else", 6}, 95 {"do", 6}, 96 {"sizeof", 7},	68{ 69 {"switch", 1}, 70 {"case", 2}, 71 {"break", 0}, 72 {"struct", 3}, 73 {"union", 3}, 74 {"enum", 3}, 75 {"default", 2}, 76 {"int", 4}, 77 {"char", 4}, 78 {"float", 4}, 79 {"double", 4}, 80 {"long", 4}, 81 {"short", 4}, 82 {"typdef", 4}, 83 {"unsigned", 4}, 84 {"register", 4}, 85 {"static", 4}, 86 {"global", 4}, 87 {"extern", 4}, 88 {"void", 4}, 89 {"goto", 0}, 90 {"return", 0}, 91 {"if", 5}, 92 {"while", 5}, 93 {"for", 5}, 94 {"else", 6}, 95 {"do", 6}, 96 {"sizeof", 7},
97 {"const", 9}, 98 {"volatile", 9},
99 {0, 0} 100}; 101 102char chartype[128] = 103{ /* this is used to facilitate the decision of 104 * what type (alphanumeric, operator) each 105 * character is / 106* 0, 0, 0, 0, 0, 0, 0, 0, 107 0, 0, 0, 0, 0, 0, 0, 0, 108 0, 0, 0, 0, 0, 0, 0, 0, 109 0, 0, 0, 0, 0, 0, 0, 0, 110 0, 3, 0, 0, 1, 3, 3, 0, 111 0, 0, 3, 3, 0, 3, 0, 3, 112 1, 1, 1, 1, 1, 1, 1, 1, 113 1, 1, 0, 0, 3, 3, 3, 3, 114 0, 1, 1, 1, 1, 1, 1, 1, 115 1, 1, 1, 1, 1, 1, 1, 1, 116 1, 1, 1, 1, 1, 1, 1, 1, 117 1, 1, 1, 0, 0, 0, 3, 1, 118 0, 1, 1, 1, 1, 1, 1, 1, 119 1, 1, 1, 1, 1, 1, 1, 1, 120 1, 1, 1, 1, 1, 1, 1, 1, 121 1, 1, 1, 0, 3, 0, 3, 0 122}; 123 124int 125lexi(void) 126{ 127 int unary_delim; /* this is set to 1 if the current token 128 * forces a following operator to be unary / 129* static int last_code; /* the last token type returned / 130* static int l_struct; /* set to 1 if the last token was 'struct' / 131* int code; /* internal code to be returned / 132* char qchar; /* the delimiter character for a string / 133* 134 e_token = s_token; /* point to start of place to save token / 135* unary_delim = false; 136 ps.col_1 = ps.last_nl; /* tell world that this token started in 137 * column 1 iff the last thing scanned was nl / 138* ps.last_nl = false; 139 140 while (buf_ptr == ' ' \|\| buf_ptr == '\t') { /* get rid of blanks / 141* ps.col_1 = false; /* leading blanks imply token is not in column 142 * 1 / 143* if (++buf_ptr >= buf_end) 144 fill_buffer(); 145 } 146 147 /* Scan an alphanumeric token / 148* if (chartype[(int)buf_ptr] == alphanum \|\| (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 149* /* 150 * we have a character or number 151 / 152* const char j; / used for searching thru list of 153 * 154 * reserved words / 155* struct templ p; 156* 157 if (isdigit(buf_ptr) \|\| (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 158* int seendot = 0, 159 seenexp = 0, 160 seensfx = 0; 161 if (buf_ptr == '0' && 162* (buf_ptr[1] == 'x' \|\| buf_ptr[1] == 'X')) { 163 e_token++ = buf_ptr++; 164 e_token++ = buf_ptr++; 165 while (isxdigit(buf_ptr)) { 166* CHECK_SIZE_TOKEN; 167 e_token++ = buf_ptr++; 168 } 169 } 170 else 171 while (1) { 172 if (buf_ptr == '.') { 173* if (seendot) 174 break; 175 else 176 seendot++; 177 } 178 CHECK_SIZE_TOKEN; 179 e_token++ = buf_ptr++; 180 if (!isdigit(buf_ptr) && buf_ptr != '.') { 181 if ((buf_ptr != 'E' && buf_ptr != 'e') \|\| seenexp) 182 break; 183 else { 184 seenexp++; 185 seendot++; 186 CHECK_SIZE_TOKEN; 187 e_token++ = buf_ptr++; 188 if (buf_ptr == '+' \|\| buf_ptr == '-') 189 e_token++ = buf_ptr++; 190 } 191 } 192 } 193 while (1) { 194 if (!(seensfx & 1) && 195 (buf_ptr == 'U' \|\| buf_ptr == 'u')) { 196 CHECK_SIZE_TOKEN; 197 e_token++ = buf_ptr++; 198 seensfx \|= 1; 199 continue; 200 } 201 if (!(seensfx & 2) && 202 (buf_ptr == 'L' \|\| buf_ptr == 'l')) { 203 CHECK_SIZE_TOKEN; 204 if (buf_ptr[1] == buf_ptr[0]) 205 e_token++ = buf_ptr++; 206 e_token++ = buf_ptr++; 207 seensfx \|= 2; 208 continue; 209 } 210 break; 211 } 212 } 213 else 214 while (chartype[(int)buf_ptr] == alphanum \|\| buf_ptr == BACKSLASH) { 215 /* fill_buffer() terminates buffer with newline / 216* if (buf_ptr == BACKSLASH) { 217* if ((buf_ptr + 1) == '\n') { 218* buf_ptr += 2; 219 if (buf_ptr >= buf_end) 220 fill_buffer(); 221 } else 222 break; 223 } 224 CHECK_SIZE_TOKEN; 225 /* copy it over / 226* e_token++ = buf_ptr++; 227 if (buf_ptr >= buf_end) 228 fill_buffer(); 229 } 230 e_token++ = '\0'; 231* while (buf_ptr == ' ' \|\| buf_ptr == '\t') { /* get rid of blanks / 232* if (++buf_ptr >= buf_end) 233 fill_buffer(); 234 } 235 ps.its_a_keyword = false; 236 ps.sizeof_keyword = false; 237 if (l_struct) { /* if last token was 'struct', then this token 238 * should be treated as a declaration / 239* l_struct = false; 240 last_code = ident; 241 ps.last_u_d = true; 242 return (decl); 243 } 244 ps.last_u_d = false; /* Operator after identifier is binary / 245* last_code = ident; /* Remember that this is the code we will 246 * return / 247* 248 /* 249 * This loop will check if the token is a keyword. 250 / 251* for (p = specials; (j = p->rwd) != 0; p++) { 252 const char q = s_token; / point at scanned token / 253* if (j++ != q++ \|\| j++ != q++) 254 continue; /* This test depends on the fact that 255 * identifiers are always at least 1 character 256 * long (ie. the first two bytes of the 257 * identifier are always meaningful) / 258* if (q[-1] == 0) 259 break; /* If its a one-character identifier / 260* while (q++ == j) 261 if (j++ == 0) 262* goto found_keyword; /* I wish that C had a multi-level 263 * break... / 264* } 265 if (p->rwd) { /* we have a keyword / 266* found_keyword: 267 ps.its_a_keyword = true; 268 ps.last_u_d = true; 269 switch (p->rwcode) { 270 case 1: /* it is a switch / 271* return (swstmt); 272 case 2: /* a case or default / 273* return (casestmt); 274 275 case 3: /* a "struct" / 276* if (ps.p_l_follow)	97 {0, 0} 98}; 99 100char chartype[128] = 101{ /* this is used to facilitate the decision of 102 * what type (alphanumeric, operator) each 103 * character is / 104* 0, 0, 0, 0, 0, 0, 0, 0, 105 0, 0, 0, 0, 0, 0, 0, 0, 106 0, 0, 0, 0, 0, 0, 0, 0, 107 0, 0, 0, 0, 0, 0, 0, 0, 108 0, 3, 0, 0, 1, 3, 3, 0, 109 0, 0, 3, 3, 0, 3, 0, 3, 110 1, 1, 1, 1, 1, 1, 1, 1, 111 1, 1, 0, 0, 3, 3, 3, 3, 112 0, 1, 1, 1, 1, 1, 1, 1, 113 1, 1, 1, 1, 1, 1, 1, 1, 114 1, 1, 1, 1, 1, 1, 1, 1, 115 1, 1, 1, 0, 0, 0, 3, 1, 116 0, 1, 1, 1, 1, 1, 1, 1, 117 1, 1, 1, 1, 1, 1, 1, 1, 118 1, 1, 1, 1, 1, 1, 1, 1, 119 1, 1, 1, 0, 3, 0, 3, 0 120}; 121 122int 123lexi(void) 124{ 125 int unary_delim; /* this is set to 1 if the current token 126 * forces a following operator to be unary / 127* static int last_code; /* the last token type returned / 128* static int l_struct; /* set to 1 if the last token was 'struct' / 129* int code; /* internal code to be returned / 130* char qchar; /* the delimiter character for a string / 131* 132 e_token = s_token; /* point to start of place to save token / 133* unary_delim = false; 134 ps.col_1 = ps.last_nl; /* tell world that this token started in 135 * column 1 iff the last thing scanned was nl / 136* ps.last_nl = false; 137 138 while (buf_ptr == ' ' \|\| buf_ptr == '\t') { /* get rid of blanks / 139* ps.col_1 = false; /* leading blanks imply token is not in column 140 * 1 / 141* if (++buf_ptr >= buf_end) 142 fill_buffer(); 143 } 144 145 /* Scan an alphanumeric token / 146* if (chartype[(int)buf_ptr] == alphanum \|\| (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 147* /* 148 * we have a character or number 149 / 150* const char j; / used for searching thru list of 151 * 152 * reserved words / 153* struct templ p; 154* 155 if (isdigit(buf_ptr) \|\| (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 156* int seendot = 0, 157 seenexp = 0, 158 seensfx = 0; 159 if (buf_ptr == '0' && 160* (buf_ptr[1] == 'x' \|\| buf_ptr[1] == 'X')) { 161 e_token++ = buf_ptr++; 162 e_token++ = buf_ptr++; 163 while (isxdigit(buf_ptr)) { 164* CHECK_SIZE_TOKEN; 165 e_token++ = buf_ptr++; 166 } 167 } 168 else 169 while (1) { 170 if (buf_ptr == '.') { 171* if (seendot) 172 break; 173 else 174 seendot++; 175 } 176 CHECK_SIZE_TOKEN; 177 e_token++ = buf_ptr++; 178 if (!isdigit(buf_ptr) && buf_ptr != '.') { 179 if ((buf_ptr != 'E' && buf_ptr != 'e') \|\| seenexp) 180 break; 181 else { 182 seenexp++; 183 seendot++; 184 CHECK_SIZE_TOKEN; 185 e_token++ = buf_ptr++; 186 if (buf_ptr == '+' \|\| buf_ptr == '-') 187 e_token++ = buf_ptr++; 188 } 189 } 190 } 191 while (1) { 192 if (!(seensfx & 1) && 193 (buf_ptr == 'U' \|\| buf_ptr == 'u')) { 194 CHECK_SIZE_TOKEN; 195 e_token++ = buf_ptr++; 196 seensfx \|= 1; 197 continue; 198 } 199 if (!(seensfx & 2) && 200 (buf_ptr == 'L' \|\| buf_ptr == 'l')) { 201 CHECK_SIZE_TOKEN; 202 if (buf_ptr[1] == buf_ptr[0]) 203 e_token++ = buf_ptr++; 204 e_token++ = buf_ptr++; 205 seensfx \|= 2; 206 continue; 207 } 208 break; 209 } 210 } 211 else 212 while (chartype[(int)buf_ptr] == alphanum \|\| buf_ptr == BACKSLASH) { 213 /* fill_buffer() terminates buffer with newline / 214* if (buf_ptr == BACKSLASH) { 215* if ((buf_ptr + 1) == '\n') { 216* buf_ptr += 2; 217 if (buf_ptr >= buf_end) 218 fill_buffer(); 219 } else 220 break; 221 } 222 CHECK_SIZE_TOKEN; 223 /* copy it over / 224* e_token++ = buf_ptr++; 225 if (buf_ptr >= buf_end) 226 fill_buffer(); 227 } 228 e_token++ = '\0'; 229* while (buf_ptr == ' ' \|\| buf_ptr == '\t') { /* get rid of blanks / 230* if (++buf_ptr >= buf_end) 231 fill_buffer(); 232 } 233 ps.its_a_keyword = false; 234 ps.sizeof_keyword = false; 235 if (l_struct) { /* if last token was 'struct', then this token 236 * should be treated as a declaration / 237* l_struct = false; 238 last_code = ident; 239 ps.last_u_d = true; 240 return (decl); 241 } 242 ps.last_u_d = false; /* Operator after identifier is binary / 243* last_code = ident; /* Remember that this is the code we will 244 * return / 245* 246 /* 247 * This loop will check if the token is a keyword. 248 / 249* for (p = specials; (j = p->rwd) != 0; p++) { 250 const char q = s_token; / point at scanned token / 251* if (j++ != q++ \|\| j++ != q++) 252 continue; /* This test depends on the fact that 253 * identifiers are always at least 1 character 254 * long (ie. the first two bytes of the 255 * identifier are always meaningful) / 256* if (q[-1] == 0) 257 break; /* If its a one-character identifier / 258* while (q++ == j) 259 if (j++ == 0) 260* goto found_keyword; /* I wish that C had a multi-level 261 * break... / 262* } 263 if (p->rwd) { /* we have a keyword / 264* found_keyword: 265 ps.its_a_keyword = true; 266 ps.last_u_d = true; 267 switch (p->rwcode) { 268 case 1: /* it is a switch / 269* return (swstmt); 270 case 2: /* a case or default / 271* return (casestmt); 272 273 case 3: /* a "struct" / 274* if (ps.p_l_follow)
277 break; /* inside parens: cast / 278* /* 279 * Next time around, we may want to know that we have had a 280 * 'struct' 281 */	275 break; /* inside parens: cast */
282 l_struct = true; 283 284 /*	276 l_struct = true; 277 278 /*
285 * Fall through to test for a cast, function prototype or 286 * sizeof().	279 * Next time around, we will want to know that we have had a 280 * 'struct'
287 / 288* case 4: /* one of the declaration keywords / 289* if (ps.p_l_follow) { 290 ps.cast_mask \|= 1 << ps.p_l_follow;	281 / 282* case 4: /* one of the declaration keywords / 283* if (ps.p_l_follow) { 284 ps.cast_mask \|= 1 << ps.p_l_follow;
291 292 /* 293 * Forget that we saw `struct' if we're in a sizeof(). 294 / 295* if (ps.sizeof_mask) 296 l_struct = false; 297 298 break; /* inside parens: cast, prototype or sizeof() */	285 break; /* inside parens: cast */
299 } 300 last_code = decl; 301 return (decl); 302 303 case 5: /* if, while, for / 304* return (sp_paren); 305 306 case 6: /* do, else / 307* return (sp_nparen); 308 309 case 7: 310 ps.sizeof_keyword = true; 311 default: /* all others are treated like any other 312 * identifier / 313* return (ident); 314 } /* end of switch / 315* } /* end of if (found_it) / 316* if (buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { 317* char tp = buf_ptr; 318* while (tp < buf_end) 319 if (tp++ == ')' && (tp == ';' \|\| tp == ',')) 320* goto not_proc; 321 strncpy(ps.procname, token, sizeof ps.procname - 1); 322 ps.in_parameter_declaration = 1; 323 rparen_count = 1; 324 not_proc:; 325 } 326 /* 327 * The following hack attempts to guess whether or not the current 328 * token is in fact a declaration keyword -- one that has been 329 * typedefd 330 / 331* if (((buf_ptr == '' && buf_ptr[1] != '=') \|\| isalpha(buf_ptr) \|\| buf_ptr == '_') 332 && !ps.p_l_follow 333 && !ps.block_init 334 && (ps.last_token == rparen \|\| ps.last_token == semicolon \|\| 335 ps.last_token == decl \|\| 336 ps.last_token == lbrace \|\| ps.last_token == rbrace)) { 337 ps.its_a_keyword = true; 338 ps.last_u_d = true; 339 last_code = decl; 340 return decl; 341 } 342 if (last_code == decl) /* if this is a declared variable, then 343 * following sign is unary / 344* ps.last_u_d = true; /* will make "int a -1" work / 345* last_code = ident; 346 return (ident); /* the ident is not in the list / 347* } /* end of procesing for alpanum character / 348* 349 /* Scan a non-alphanumeric token / 350* 351 e_token++ = buf_ptr; /* if it is only a one-character token, it is 352 * moved here / 353* e_token = '\0'; 354* if (++buf_ptr >= buf_end) 355 fill_buffer(); 356 357 switch (token) { 358* case '\n': 359 unary_delim = ps.last_u_d; 360 ps.last_nl = true; /* remember that we just had a newline / 361* code = (had_eof ? 0 : newline); 362 363 /* 364 * if data has been exhausted, the newline is a dummy, and we should 365 * return code to stop 366 / 367* break; 368 369 case '\'': /* start of quoted character / 370* case '"': /* start of string / 371* qchar = token; 372* if (troff) { 373 e_token[-1] = '`'; 374 if (qchar == '"') 375 e_token++ = '`'; 376* e_token = chfont(&bodyf, &stringf, e_token); 377 } 378 do { /* copy the string / 379* while (1) { /* move one character or [/<char>]<char> / 380* if (buf_ptr == '\n') { 381* printf("%d: Unterminated literal\n", line_no); 382 goto stop_lit; 383 } 384 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 385 * since CHECK_SIZE guarantees that there 386 * are at least 5 entries left / 387* e_token = buf_ptr++; 388 if (buf_ptr >= buf_end) 389 fill_buffer(); 390 if (e_token == BACKSLASH) { / if escape, copy extra char / 391* if (buf_ptr == '\n') / check for escaped newline / 392* ++line_no; 393 if (troff) { 394 ++e_token = BACKSLASH; 395* if (buf_ptr == BACKSLASH) 396* ++e_token = BACKSLASH; 397* } 398 ++e_token = buf_ptr++; 399 ++e_token; /* we must increment this again because we 400 * copied two chars / 401* if (buf_ptr >= buf_end) 402 fill_buffer(); 403 } 404 else 405 break; /* we copied one character / 406* } /* end of while (1) / 407* } while (e_token++ != qchar); 408* if (troff) { 409 e_token = chfont(&stringf, &bodyf, e_token - 1); 410 if (qchar == '"') 411 e_token++ = '\''; 412* } 413stop_lit: 414 code = ident; 415 break; 416 417 case ('('): 418 case ('['): 419 unary_delim = true; 420 code = lparen; 421 break; 422 423 case (')'): 424 case (']'): 425 code = rparen; 426 break; 427 428 case '#': 429 unary_delim = ps.last_u_d; 430 code = preesc; 431 break; 432 433 case '?': 434 unary_delim = true; 435 code = question; 436 break; 437 438 case (':'): 439 code = colon; 440 unary_delim = true; 441 break; 442 443 case (';'): 444 unary_delim = true; 445 code = semicolon; 446 break; 447 448 case ('{'): 449 unary_delim = true; 450 451 /* 452 * if (ps.in_or_st) ps.block_init = 1; 453 / 454* /* ? code = ps.block_init ? lparen : lbrace; / 455* code = lbrace; 456 break; 457 458 case ('}'): 459 unary_delim = true; 460 /* ? code = ps.block_init ? rparen : rbrace; / 461* code = rbrace; 462 break; 463 464 case 014: /* a form feed / 465* unary_delim = ps.last_u_d; 466 ps.last_nl = true; /* remember this so we can set 'ps.col_1' 467 * right / 468* code = form_feed; 469 break; 470 471 case (','): 472 unary_delim = true; 473 code = comma; 474 break; 475 476 case '.': 477 unary_delim = false; 478 code = period; 479 break; 480 481 case '-': 482 case '+': /* check for -, +, --, ++ / 483* code = (ps.last_u_d ? unary_op : binary_op); 484 unary_delim = true; 485 486 if (buf_ptr == token[0]) { 487* /* check for doubled character / 488* e_token++ = buf_ptr++; 489 /* buffer overflow will be checked at end of loop / 490* if (last_code == ident \|\| last_code == rparen) { 491 code = (ps.last_u_d ? unary_op : postop); 492 /* check for following ++ or -- / 493* unary_delim = false; 494 } 495 } 496 else if (buf_ptr == '=') 497* /* check for operator += / 498* e_token++ = buf_ptr++; 499 else if (buf_ptr == '>') { 500* /* check for operator -> / 501* e_token++ = buf_ptr++; 502 if (!pointer_as_binop) { 503 unary_delim = false; 504 code = unary_op; 505 ps.want_blank = false; 506 } 507 } 508 break; /* buffer overflow will be checked at end of 509 * switch / 510* 511 case '=': 512 if (ps.in_or_st) 513 ps.block_init = 1; 514#ifdef undef 515 if (chartype[buf_ptr] == opchar) { / we have two char assignment / 516* e_token[-1] = buf_ptr++; 517* if ((e_token[-1] == '<' \|\| e_token[-1] == '>') && e_token[-1] == buf_ptr) 518* e_token++ = buf_ptr++; 519 e_token++ = '='; / Flip =+ to += / 520* e_token = 0; 521* } 522#else 523 if (buf_ptr == '=') {/ == / 524* e_token++ = '='; / Flip =+ to += / 525* buf_ptr++; 526 e_token = 0; 527* } 528#endif 529 code = binary_op; 530 unary_delim = true; 531 break; 532 /* can drop thru!!! / 533* 534 case '>': 535 case '<': 536 case '!': /* ops like <, <<, <=, !=, etc / 537* if (buf_ptr == '>' \|\| buf_ptr == '<' \|\| buf_ptr == '=') { 538* e_token++ = buf_ptr; 539 if (++buf_ptr >= buf_end) 540 fill_buffer(); 541 } 542 if (buf_ptr == '=') 543* e_token++ = buf_ptr++; 544 code = (ps.last_u_d ? unary_op : binary_op); 545 unary_delim = true; 546 break; 547 548 default: 549 if (token[0] == '/' && buf_ptr == '') { 550 /* it is start of comment / 551* e_token++ = ''; 552 553 if (++buf_ptr >= buf_end) 554 fill_buffer(); 555 556 code = comment; 557 unary_delim = ps.last_u_d; 558 break; 559 } 560 while ((e_token - 1) == buf_ptr \|\| buf_ptr == '=') { 561* /* 562 * handle \|\|, &&, etc, and also things as in int ****i 563* / 564* e_token++ = buf_ptr; 565 if (++buf_ptr >= buf_end) 566 fill_buffer(); 567 } 568 code = (ps.last_u_d ? unary_op : binary_op); 569 unary_delim = true; 570 571 572 } /* end of switch / 573* if (code != newline) { 574 l_struct = false; 575 last_code = code; 576 } 577 if (buf_ptr >= buf_end) /* check for input buffer empty / 578* fill_buffer(); 579 ps.last_u_d = unary_delim; 580 e_token = '\0'; / null terminate the token / 581* return (code); 582} 583 584/* 585 * Add the given keyword to the keyword table, using val as the keyword type 586 / 587void 588addkey(char key, int val) 589{ 590 struct templ p = specials; 591* while (p->rwd) 592 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) 593 return; 594 else 595 p++; 596 if (p >= specials + sizeof specials / sizeof specials[0]) 597 return; /* For now, table overflows are silently 598 * ignored / 599* p->rwd = key; 600 p->rwcode = val; 601 p[1].rwd = 0; 602 p[1].rwcode = 0; 603}	286 } 287 last_code = decl; 288 return (decl); 289 290 case 5: /* if, while, for / 291* return (sp_paren); 292 293 case 6: /* do, else / 294* return (sp_nparen); 295 296 case 7: 297 ps.sizeof_keyword = true; 298 default: /* all others are treated like any other 299 * identifier / 300* return (ident); 301 } /* end of switch / 302* } /* end of if (found_it) / 303* if (buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { 304* char tp = buf_ptr; 305* while (tp < buf_end) 306 if (tp++ == ')' && (tp == ';' \|\| tp == ',')) 307* goto not_proc; 308 strncpy(ps.procname, token, sizeof ps.procname - 1); 309 ps.in_parameter_declaration = 1; 310 rparen_count = 1; 311 not_proc:; 312 } 313 /* 314 * The following hack attempts to guess whether or not the current 315 * token is in fact a declaration keyword -- one that has been 316 * typedefd 317 / 318* if (((buf_ptr == '' && buf_ptr[1] != '=') \|\| isalpha(buf_ptr) \|\| buf_ptr == '_') 319 && !ps.p_l_follow 320 && !ps.block_init 321 && (ps.last_token == rparen \|\| ps.last_token == semicolon \|\| 322 ps.last_token == decl \|\| 323 ps.last_token == lbrace \|\| ps.last_token == rbrace)) { 324 ps.its_a_keyword = true; 325 ps.last_u_d = true; 326 last_code = decl; 327 return decl; 328 } 329 if (last_code == decl) /* if this is a declared variable, then 330 * following sign is unary / 331* ps.last_u_d = true; /* will make "int a -1" work / 332* last_code = ident; 333 return (ident); /* the ident is not in the list / 334* } /* end of procesing for alpanum character / 335* 336 /* Scan a non-alphanumeric token / 337* 338 e_token++ = buf_ptr; /* if it is only a one-character token, it is 339 * moved here / 340* e_token = '\0'; 341* if (++buf_ptr >= buf_end) 342 fill_buffer(); 343 344 switch (token) { 345* case '\n': 346 unary_delim = ps.last_u_d; 347 ps.last_nl = true; /* remember that we just had a newline / 348* code = (had_eof ? 0 : newline); 349 350 /* 351 * if data has been exhausted, the newline is a dummy, and we should 352 * return code to stop 353 / 354* break; 355 356 case '\'': /* start of quoted character / 357* case '"': /* start of string / 358* qchar = token; 359* if (troff) { 360 e_token[-1] = '`'; 361 if (qchar == '"') 362 e_token++ = '`'; 363* e_token = chfont(&bodyf, &stringf, e_token); 364 } 365 do { /* copy the string / 366* while (1) { /* move one character or [/<char>]<char> / 367* if (buf_ptr == '\n') { 368* printf("%d: Unterminated literal\n", line_no); 369 goto stop_lit; 370 } 371 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 372 * since CHECK_SIZE guarantees that there 373 * are at least 5 entries left / 374* e_token = buf_ptr++; 375 if (buf_ptr >= buf_end) 376 fill_buffer(); 377 if (e_token == BACKSLASH) { / if escape, copy extra char / 378* if (buf_ptr == '\n') / check for escaped newline / 379* ++line_no; 380 if (troff) { 381 ++e_token = BACKSLASH; 382* if (buf_ptr == BACKSLASH) 383* ++e_token = BACKSLASH; 384* } 385 ++e_token = buf_ptr++; 386 ++e_token; /* we must increment this again because we 387 * copied two chars / 388* if (buf_ptr >= buf_end) 389 fill_buffer(); 390 } 391 else 392 break; /* we copied one character / 393* } /* end of while (1) / 394* } while (e_token++ != qchar); 395* if (troff) { 396 e_token = chfont(&stringf, &bodyf, e_token - 1); 397 if (qchar == '"') 398 e_token++ = '\''; 399* } 400stop_lit: 401 code = ident; 402 break; 403 404 case ('('): 405 case ('['): 406 unary_delim = true; 407 code = lparen; 408 break; 409 410 case (')'): 411 case (']'): 412 code = rparen; 413 break; 414 415 case '#': 416 unary_delim = ps.last_u_d; 417 code = preesc; 418 break; 419 420 case '?': 421 unary_delim = true; 422 code = question; 423 break; 424 425 case (':'): 426 code = colon; 427 unary_delim = true; 428 break; 429 430 case (';'): 431 unary_delim = true; 432 code = semicolon; 433 break; 434 435 case ('{'): 436 unary_delim = true; 437 438 /* 439 * if (ps.in_or_st) ps.block_init = 1; 440 / 441* /* ? code = ps.block_init ? lparen : lbrace; / 442* code = lbrace; 443 break; 444 445 case ('}'): 446 unary_delim = true; 447 /* ? code = ps.block_init ? rparen : rbrace; / 448* code = rbrace; 449 break; 450 451 case 014: /* a form feed / 452* unary_delim = ps.last_u_d; 453 ps.last_nl = true; /* remember this so we can set 'ps.col_1' 454 * right / 455* code = form_feed; 456 break; 457 458 case (','): 459 unary_delim = true; 460 code = comma; 461 break; 462 463 case '.': 464 unary_delim = false; 465 code = period; 466 break; 467 468 case '-': 469 case '+': /* check for -, +, --, ++ / 470* code = (ps.last_u_d ? unary_op : binary_op); 471 unary_delim = true; 472 473 if (buf_ptr == token[0]) { 474* /* check for doubled character / 475* e_token++ = buf_ptr++; 476 /* buffer overflow will be checked at end of loop / 477* if (last_code == ident \|\| last_code == rparen) { 478 code = (ps.last_u_d ? unary_op : postop); 479 /* check for following ++ or -- / 480* unary_delim = false; 481 } 482 } 483 else if (buf_ptr == '=') 484* /* check for operator += / 485* e_token++ = buf_ptr++; 486 else if (buf_ptr == '>') { 487* /* check for operator -> / 488* e_token++ = buf_ptr++; 489 if (!pointer_as_binop) { 490 unary_delim = false; 491 code = unary_op; 492 ps.want_blank = false; 493 } 494 } 495 break; /* buffer overflow will be checked at end of 496 * switch / 497* 498 case '=': 499 if (ps.in_or_st) 500 ps.block_init = 1; 501#ifdef undef 502 if (chartype[buf_ptr] == opchar) { / we have two char assignment / 503* e_token[-1] = buf_ptr++; 504* if ((e_token[-1] == '<' \|\| e_token[-1] == '>') && e_token[-1] == buf_ptr) 505* e_token++ = buf_ptr++; 506 e_token++ = '='; / Flip =+ to += / 507* e_token = 0; 508* } 509#else 510 if (buf_ptr == '=') {/ == / 511* e_token++ = '='; / Flip =+ to += / 512* buf_ptr++; 513 e_token = 0; 514* } 515#endif 516 code = binary_op; 517 unary_delim = true; 518 break; 519 /* can drop thru!!! / 520* 521 case '>': 522 case '<': 523 case '!': /* ops like <, <<, <=, !=, etc / 524* if (buf_ptr == '>' \|\| buf_ptr == '<' \|\| buf_ptr == '=') { 525* e_token++ = buf_ptr; 526 if (++buf_ptr >= buf_end) 527 fill_buffer(); 528 } 529 if (buf_ptr == '=') 530* e_token++ = buf_ptr++; 531 code = (ps.last_u_d ? unary_op : binary_op); 532 unary_delim = true; 533 break; 534 535 default: 536 if (token[0] == '/' && buf_ptr == '') { 537 /* it is start of comment / 538* e_token++ = ''; 539 540 if (++buf_ptr >= buf_end) 541 fill_buffer(); 542 543 code = comment; 544 unary_delim = ps.last_u_d; 545 break; 546 } 547 while ((e_token - 1) == buf_ptr \|\| buf_ptr == '=') { 548* /* 549 * handle \|\|, &&, etc, and also things as in int ****i 550* / 551* e_token++ = buf_ptr; 552 if (++buf_ptr >= buf_end) 553 fill_buffer(); 554 } 555 code = (ps.last_u_d ? unary_op : binary_op); 556 unary_delim = true; 557 558 559 } /* end of switch / 560* if (code != newline) { 561 l_struct = false; 562 last_code = code; 563 } 564 if (buf_ptr >= buf_end) /* check for input buffer empty / 565* fill_buffer(); 566 ps.last_u_d = unary_delim; 567 e_token = '\0'; / null terminate the token / 568* return (code); 569} 570 571/* 572 * Add the given keyword to the keyword table, using val as the keyword type 573 / 574void 575addkey(char key, int val) 576{ 577 struct templ p = specials; 578* while (p->rwd) 579 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) 580 return; 581 else 582 p++; 583 if (p >= specials + sizeof specials / sizeof specials[0]) 584 return; /* For now, table overflows are silently 585 * ignored / 586* p->rwd = key; 587 p->rwcode = val; 588 p[1].rwd = 0; 589 p[1].rwcode = 0; 590}