C.c revision 302408
1/* 2 * Copyright (c) 1987, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#if 0 31#ifndef lint 32static char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94"; 33#endif 34#endif 35 36#include <sys/cdefs.h> 37__FBSDID("$FreeBSD: stable/11/usr.bin/ctags/C.c 299355 2016-05-10 11:11:23Z bapt $"); 38 39#include <limits.h> 40#include <stdio.h> 41#include <string.h> 42 43#include "ctags.h" 44 45static int func_entry(void); 46static void hash_entry(void); 47static void skip_string(int); 48static int str_entry(int); 49 50/* 51 * c_entries -- 52 * read .c and .h files and call appropriate routines 53 */ 54void 55c_entries(void) 56{ 57 int c; /* current character */ 58 int level; /* brace level */ 59 int token; /* if reading a token */ 60 int t_def; /* if reading a typedef */ 61 int t_level; /* typedef's brace level */ 62 char *sp; /* buffer pointer */ 63 char tok[MAXTOKEN]; /* token buffer */ 64 65 lineftell = ftell(inf); 66 sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1; 67 while (GETC(!=, EOF)) { 68 switch (c) { 69 /* 70 * Here's where it DOESN'T handle: { 71 * foo(a) 72 * { 73 * #ifdef notdef 74 * } 75 * #endif 76 * if (a) 77 * puts("hello, world"); 78 * } 79 */ 80 case '{': 81 ++level; 82 goto endtok; 83 case '}': 84 /* 85 * if level goes below zero, try and fix 86 * it, even though we've already messed up 87 */ 88 if (--level < 0) 89 level = 0; 90 goto endtok; 91 92 case '\n': 93 SETLINE; 94 /* 95 * the above 3 cases are similar in that they 96 * are special characters that also end tokens. 97 */ 98 endtok: if (sp > tok) { 99 *sp = EOS; 100 token = YES; 101 sp = tok; 102 } 103 else 104 token = NO; 105 continue; 106 107 /* 108 * We ignore quoted strings and character constants 109 * completely. 110 */ 111 case '"': 112 case '\'': 113 skip_string(c); 114 break; 115 116 /* 117 * comments can be fun; note the state is unchanged after 118 * return, in case we found: 119 * "foo() XX comment XX { int bar; }" 120 */ 121 case '/': 122 if (GETC(==, '*') || c == '/') { 123 skip_comment(c); 124 continue; 125 } 126 (void)ungetc(c, inf); 127 c = '/'; 128 goto storec; 129 130 /* hash marks flag #define's. */ 131 case '#': 132 if (sp == tok) { 133 hash_entry(); 134 break; 135 } 136 goto storec; 137 138 /* 139 * if we have a current token, parenthesis on 140 * level zero indicates a function. 141 */ 142 case '(': 143 if (!level && token) { 144 int curline; 145 146 if (sp != tok) 147 *sp = EOS; 148 /* 149 * grab the line immediately, we may 150 * already be wrong, for example, 151 * foo\n 152 * (arg1, 153 */ 154 get_line(); 155 curline = lineno; 156 if (func_entry()) { 157 ++level; 158 pfnote(tok, curline); 159 } 160 break; 161 } 162 goto storec; 163 164 /* 165 * semi-colons indicate the end of a typedef; if we find a 166 * typedef we search for the next semi-colon of the same 167 * level as the typedef. Ignoring "structs", they are 168 * tricky, since you can find: 169 * 170 * "typedef long time_t;" 171 * "typedef unsigned int u_int;" 172 * "typedef unsigned int u_int [10];" 173 * 174 * If looking at a typedef, we save a copy of the last token 175 * found. Then, when we find the ';' we take the current 176 * token if it starts with a valid token name, else we take 177 * the one we saved. There's probably some reasonable 178 * alternative to this... 179 */ 180 case ';': 181 if (t_def && level == t_level) { 182 t_def = NO; 183 get_line(); 184 if (sp != tok) 185 *sp = EOS; 186 pfnote(tok, lineno); 187 break; 188 } 189 goto storec; 190 191 /* 192 * store characters until one that can't be part of a token 193 * comes along; check the current token against certain 194 * reserved words. 195 */ 196 default: 197 /* ignore whitespace */ 198 if (c == ' ' || c == '\t') { 199 int save = c; 200 while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 201 ; 202 if (c == EOF) 203 return; 204 (void)ungetc(c, inf); 205 c = save; 206 } 207 storec: if (!intoken(c)) { 208 if (sp == tok) 209 break; 210 *sp = EOS; 211 if (tflag) { 212 /* no typedefs inside typedefs */ 213 if (!t_def && 214 !memcmp(tok, "typedef",8)) { 215 t_def = YES; 216 t_level = level; 217 break; 218 } 219 /* catch "typedef struct" */ 220 if ((!t_def || t_level < level) 221 && (!memcmp(tok, "struct", 7) 222 || !memcmp(tok, "union", 6) 223 || !memcmp(tok, "enum", 5))) { 224 /* 225 * get line immediately; 226 * may change before '{' 227 */ 228 get_line(); 229 if (str_entry(c)) 230 ++level; 231 break; 232 /* } */ 233 } 234 } 235 sp = tok; 236 } 237 else if (sp != tok || begtoken(c)) { 238 if (sp == tok + sizeof tok - 1) 239 /* Too long -- truncate it */ 240 *sp = EOS; 241 else 242 *sp++ = c; 243 token = YES; 244 } 245 continue; 246 } 247 248 sp = tok; 249 token = NO; 250 } 251} 252 253/* 254 * func_entry -- 255 * handle a function reference 256 */ 257static int 258func_entry(void) 259{ 260 int c; /* current character */ 261 int level = 0; /* for matching '()' */ 262 263 /* 264 * Find the end of the assumed function declaration. 265 * Note that ANSI C functions can have type definitions so keep 266 * track of the parentheses nesting level. 267 */ 268 while (GETC(!=, EOF)) { 269 switch (c) { 270 case '\'': 271 case '"': 272 /* skip strings and character constants */ 273 skip_string(c); 274 break; 275 case '/': 276 /* skip comments */ 277 if (GETC(==, '*') || c == '/') 278 skip_comment(c); 279 break; 280 case '(': 281 level++; 282 break; 283 case ')': 284 if (level == 0) 285 goto fnd; 286 level--; 287 break; 288 case '\n': 289 SETLINE; 290 } 291 } 292 return (NO); 293fnd: 294 /* 295 * we assume that the character after a function's right paren 296 * is a token character if it's a function and a non-token 297 * character if it's a declaration. Comments don't count... 298 */ 299 for (;;) { 300 while (GETC(!=, EOF) && iswhite(c)) 301 if (c == '\n') 302 SETLINE; 303 if (intoken(c) || c == '{') 304 break; 305 if (c == '/' && (GETC(==, '*') || c == '/')) 306 skip_comment(c); 307 else { /* don't ever "read" '/' */ 308 (void)ungetc(c, inf); 309 return (NO); 310 } 311 } 312 if (c != '{') 313 (void)skip_key('{'); 314 return (YES); 315} 316 317/* 318 * hash_entry -- 319 * handle a line starting with a '#' 320 */ 321static void 322hash_entry(void) 323{ 324 int c; /* character read */ 325 int curline; /* line started on */ 326 char *sp; /* buffer pointer */ 327 char tok[MAXTOKEN]; /* storage buffer */ 328 329 /* ignore leading whitespace */ 330 while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 331 ; 332 (void)ungetc(c, inf); 333 334 curline = lineno; 335 for (sp = tok;;) { /* get next token */ 336 if (GETC(==, EOF)) 337 return; 338 if (iswhite(c)) 339 break; 340 if (sp == tok + sizeof tok - 1) 341 /* Too long -- truncate it */ 342 *sp = EOS; 343 else 344 *sp++ = c; 345 } 346 *sp = EOS; 347 if (memcmp(tok, "define", 6)) /* only interested in #define's */ 348 goto skip; 349 for (;;) { /* this doesn't handle "#define \n" */ 350 if (GETC(==, EOF)) 351 return; 352 if (!iswhite(c)) 353 break; 354 } 355 for (sp = tok;;) { /* get next token */ 356 if (sp == tok + sizeof tok - 1) 357 /* Too long -- truncate it */ 358 *sp = EOS; 359 else 360 *sp++ = c; 361 if (GETC(==, EOF)) 362 return; 363 /* 364 * this is where it DOESN'T handle 365 * "#define \n" 366 */ 367 if (!intoken(c)) 368 break; 369 } 370 *sp = EOS; 371 if (dflag || c == '(') { /* only want macros */ 372 get_line(); 373 pfnote(tok, curline); 374 } 375skip: if (c == '\n') { /* get rid of rest of define */ 376 SETLINE 377 if (*(sp - 1) != '\\') 378 return; 379 } 380 (void)skip_key('\n'); 381} 382 383/* 384 * str_entry -- 385 * handle a struct, union or enum entry 386 */ 387static int 388str_entry(int c) /* c is current character */ 389{ 390 int curline; /* line started on */ 391 char *sp; /* buffer pointer */ 392 char tok[LINE_MAX]; /* storage buffer */ 393 394 curline = lineno; 395 while (iswhite(c)) 396 if (GETC(==, EOF)) 397 return (NO); 398 if (c == '{') /* it was "struct {" */ 399 return (YES); 400 for (sp = tok;;) { /* get next token */ 401 if (sp == tok + sizeof tok - 1) 402 /* Too long -- truncate it */ 403 *sp = EOS; 404 else 405 *sp++ = c; 406 if (GETC(==, EOF)) 407 return (NO); 408 if (!intoken(c)) 409 break; 410 } 411 switch (c) { 412 case '{': /* it was "struct foo{" */ 413 --sp; 414 break; 415 case '\n': /* it was "struct foo\n" */ 416 SETLINE; 417 /*FALLTHROUGH*/ 418 default: /* probably "struct foo " */ 419 while (GETC(!=, EOF)) 420 if (!iswhite(c)) 421 break; 422 if (c != '{') { 423 (void)ungetc(c, inf); 424 return (NO); 425 } 426 } 427 *sp = EOS; 428 pfnote(tok, curline); 429 return (YES); 430} 431 432/* 433 * skip_comment -- 434 * skip over comment 435 */ 436void 437skip_comment(int t) /* t is comment character */ 438{ 439 int c; /* character read */ 440 int star; /* '*' flag */ 441 442 for (star = 0; GETC(!=, EOF);) 443 switch(c) { 444 /* comments don't nest, nor can they be escaped. */ 445 case '*': 446 star = YES; 447 break; 448 case '/': 449 if (star && t == '*') 450 return; 451 break; 452 case '\n': 453 if (t == '/') 454 return; 455 SETLINE; 456 /*FALLTHROUGH*/ 457 default: 458 star = NO; 459 break; 460 } 461} 462 463/* 464 * skip_string -- 465 * skip to the end of a string or character constant. 466 */ 467void 468skip_string(int key) 469{ 470 int c, 471 skip; 472 473 for (skip = NO; GETC(!=, EOF); ) 474 switch (c) { 475 case '\\': /* a backslash escapes anything */ 476 skip = !skip; /* we toggle in case it's "\\" */ 477 break; 478 case '\n': 479 SETLINE; 480 /*FALLTHROUGH*/ 481 default: 482 if (c == key && !skip) 483 return; 484 skip = NO; 485 } 486} 487 488/* 489 * skip_key -- 490 * skip to next char "key" 491 */ 492int 493skip_key(int key) 494{ 495 int c, 496 skip, 497 retval; 498 499 for (skip = retval = NO; GETC(!=, EOF);) 500 switch(c) { 501 case '\\': /* a backslash escapes anything */ 502 skip = !skip; /* we toggle in case it's "\\" */ 503 break; 504 case ';': /* special case for yacc; if one */ 505 case '|': /* of these chars occurs, we may */ 506 retval = YES; /* have moved out of the rule */ 507 break; /* not used by C */ 508 case '\'': 509 case '"': 510 /* skip strings and character constants */ 511 skip_string(c); 512 break; 513 case '/': 514 /* skip comments */ 515 if (GETC(==, '*') || c == '/') { 516 skip_comment(c); 517 break; 518 } 519 (void)ungetc(c, inf); 520 c = '/'; 521 goto norm; 522 case '\n': 523 SETLINE; 524 /*FALLTHROUGH*/ 525 default: 526 norm: 527 if (c == key && !skip) 528 return (retval); 529 skip = NO; 530 } 531 return (retval); 532} 533