C.c revision 91189
1/* 2 * Copyright (c) 1987, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34#if 0 35#ifndef lint 36static char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94"; 37#endif 38#endif 39 40#include <sys/cdefs.h> 41__FBSDID("$FreeBSD: head/usr.bin/ctags/C.c 91189 2002-02-24 03:02:52Z gshapiro $"); 42 43#include <limits.h> 44#include <stdio.h> 45 46#include "ctags.h" 47 48static int func_entry __P((void)); 49static void hash_entry __P((void)); 50static void skip_string __P((int)); 51static int str_entry __P((int)); 52 53/* 54 * c_entries -- 55 * read .c and .h files and call appropriate routines 56 */ 57void 58c_entries() 59{ 60 int c; /* current character */ 61 int level; /* brace level */ 62 int token; /* if reading a token */ 63 int t_def; /* if reading a typedef */ 64 int t_level; /* typedef's brace level */ 65 char *sp; /* buffer pointer */ 66 char tok[MAXTOKEN]; /* token buffer */ 67 68 lineftell = ftell(inf); 69 sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1; 70 while (GETC(!=, EOF)) { 71 switch (c) { 72 /* 73 * Here's where it DOESN'T handle: { 74 * foo(a) 75 * { 76 * #ifdef notdef 77 * } 78 * #endif 79 * if (a) 80 * puts("hello, world"); 81 * } 82 */ 83 case '{': 84 ++level; 85 goto endtok; 86 case '}': 87 /* 88 * if level goes below zero, try and fix 89 * it, even though we've already messed up 90 */ 91 if (--level < 0) 92 level = 0; 93 goto endtok; 94 95 case '\n': 96 SETLINE; 97 /* 98 * the above 3 cases are similar in that they 99 * are special characters that also end tokens. 100 */ 101 endtok: if (sp > tok) { 102 *sp = EOS; 103 token = YES; 104 sp = tok; 105 } 106 else 107 token = NO; 108 continue; 109 110 /* 111 * We ignore quoted strings and character constants 112 * completely. 113 */ 114 case '"': 115 case '\'': 116 (void)skip_string(c); 117 break; 118 119 /* 120 * comments can be fun; note the state is unchanged after 121 * return, in case we found: 122 * "foo() XX comment XX { int bar; }" 123 */ 124 case '/': 125 if (GETC(==, '*') || c == '/') { 126 skip_comment(c); 127 continue; 128 } 129 (void)ungetc(c, inf); 130 c = '/'; 131 goto storec; 132 133 /* hash marks flag #define's. */ 134 case '#': 135 if (sp == tok) { 136 hash_entry(); 137 break; 138 } 139 goto storec; 140 141 /* 142 * if we have a current token, parenthesis on 143 * level zero indicates a function. 144 */ 145 case '(': 146 if (!level && token) { 147 int curline; 148 149 if (sp != tok) 150 *sp = EOS; 151 /* 152 * grab the line immediately, we may 153 * already be wrong, for example, 154 * foo\n 155 * (arg1, 156 */ 157 getline(); 158 curline = lineno; 159 if (func_entry()) { 160 ++level; 161 pfnote(tok, curline); 162 } 163 break; 164 } 165 goto storec; 166 167 /* 168 * semi-colons indicate the end of a typedef; if we find a 169 * typedef we search for the next semi-colon of the same 170 * level as the typedef. Ignoring "structs", they are 171 * tricky, since you can find: 172 * 173 * "typedef long time_t;" 174 * "typedef unsigned int u_int;" 175 * "typedef unsigned int u_int [10];" 176 * 177 * If looking at a typedef, we save a copy of the last token 178 * found. Then, when we find the ';' we take the current 179 * token if it starts with a valid token name, else we take 180 * the one we saved. There's probably some reasonable 181 * alternative to this... 182 */ 183 case ';': 184 if (t_def && level == t_level) { 185 t_def = NO; 186 getline(); 187 if (sp != tok) 188 *sp = EOS; 189 pfnote(tok, lineno); 190 break; 191 } 192 goto storec; 193 194 /* 195 * store characters until one that can't be part of a token 196 * comes along; check the current token against certain 197 * reserved words. 198 */ 199 default: 200 /* ignore whitespace */ 201 if (c == ' ' || c == '\t') { 202 int save = c; 203 while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 204 ; 205 if (c == EOF) 206 return; 207 (void)ungetc(c, inf); 208 c = save; 209 } 210 storec: if (!intoken(c)) { 211 if (sp == tok) 212 break; 213 *sp = EOS; 214 if (tflag) { 215 /* no typedefs inside typedefs */ 216 if (!t_def && 217 !memcmp(tok, "typedef",8)) { 218 t_def = YES; 219 t_level = level; 220 break; 221 } 222 /* catch "typedef struct" */ 223 if ((!t_def || t_level < level) 224 && (!memcmp(tok, "struct", 7) 225 || !memcmp(tok, "union", 6) 226 || !memcmp(tok, "enum", 5))) { 227 /* 228 * get line immediately; 229 * may change before '{' 230 */ 231 getline(); 232 if (str_entry(c)) 233 ++level; 234 break; 235 /* } */ 236 } 237 } 238 sp = tok; 239 } 240 else if (sp != tok || begtoken(c)) { 241 *sp++ = c; 242 token = YES; 243 } 244 continue; 245 } 246 247 sp = tok; 248 token = NO; 249 } 250} 251 252/* 253 * func_entry -- 254 * handle a function reference 255 */ 256static int 257func_entry() 258{ 259 int c; /* current character */ 260 int level = 0; /* for matching '()' */ 261 262 /* 263 * Find the end of the assumed function declaration. 264 * Note that ANSI C functions can have type definitions so keep 265 * track of the parentheses nesting level. 266 */ 267 while (GETC(!=, EOF)) { 268 switch (c) { 269 case '\'': 270 case '"': 271 /* skip strings and character constants */ 272 skip_string(c); 273 break; 274 case '/': 275 /* skip comments */ 276 if (GETC(==, '*') || c == '/') 277 skip_comment(c); 278 break; 279 case '(': 280 level++; 281 break; 282 case ')': 283 if (level == 0) 284 goto fnd; 285 level--; 286 break; 287 case '\n': 288 SETLINE; 289 } 290 } 291 return (NO); 292fnd: 293 /* 294 * we assume that the character after a function's right paren 295 * is a token character if it's a function and a non-token 296 * character if it's a declaration. Comments don't count... 297 */ 298 for (;;) { 299 while (GETC(!=, EOF) && iswhite(c)) 300 if (c == '\n') 301 SETLINE; 302 if (intoken(c) || c == '{') 303 break; 304 if (c == '/' && (GETC(==, '*') || c == '/')) 305 skip_comment(c); 306 else { /* don't ever "read" '/' */ 307 (void)ungetc(c, inf); 308 return (NO); 309 } 310 } 311 if (c != '{') 312 (void)skip_key('{'); 313 return (YES); 314} 315 316/* 317 * hash_entry -- 318 * handle a line starting with a '#' 319 */ 320static void 321hash_entry() 322{ 323 int c; /* character read */ 324 int curline; /* line started on */ 325 char *sp; /* buffer pointer */ 326 char tok[MAXTOKEN]; /* storage buffer */ 327 328 /* ignore leading whitespace */ 329 while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 330 ; 331 (void)ungetc(c, inf); 332 333 curline = lineno; 334 for (sp = tok;;) { /* get next token */ 335 if (GETC(==, EOF)) 336 return; 337 if (iswhite(c)) 338 break; 339 *sp++ = c; 340 } 341 *sp = EOS; 342 if (memcmp(tok, "define", 6)) /* only interested in #define's */ 343 goto skip; 344 for (;;) { /* this doesn't handle "#define \n" */ 345 if (GETC(==, EOF)) 346 return; 347 if (!iswhite(c)) 348 break; 349 } 350 for (sp = tok;;) { /* get next token */ 351 *sp++ = c; 352 if (GETC(==, EOF)) 353 return; 354 /* 355 * this is where it DOESN'T handle 356 * "#define \n" 357 */ 358 if (!intoken(c)) 359 break; 360 } 361 *sp = EOS; 362 if (dflag || c == '(') { /* only want macros */ 363 getline(); 364 pfnote(tok, curline); 365 } 366skip: if (c == '\n') { /* get rid of rest of define */ 367 SETLINE 368 if (*(sp - 1) != '\\') 369 return; 370 } 371 (void)skip_key('\n'); 372} 373 374/* 375 * str_entry -- 376 * handle a struct, union or enum entry 377 */ 378static int 379str_entry(c) 380 int c; /* current character */ 381{ 382 int curline; /* line started on */ 383 char *sp; /* buffer pointer */ 384 char tok[LINE_MAX]; /* storage buffer */ 385 386 curline = lineno; 387 while (iswhite(c)) 388 if (GETC(==, EOF)) 389 return (NO); 390 if (c == '{') /* it was "struct {" */ 391 return (YES); 392 for (sp = tok;;) { /* get next token */ 393 *sp++ = c; 394 if (GETC(==, EOF)) 395 return (NO); 396 if (!intoken(c)) 397 break; 398 } 399 switch (c) { 400 case '{': /* it was "struct foo{" */ 401 --sp; 402 break; 403 case '\n': /* it was "struct foo\n" */ 404 SETLINE; 405 /*FALLTHROUGH*/ 406 default: /* probably "struct foo " */ 407 while (GETC(!=, EOF)) 408 if (!iswhite(c)) 409 break; 410 if (c != '{') { 411 (void)ungetc(c, inf); 412 return (NO); 413 } 414 } 415 *sp = EOS; 416 pfnote(tok, curline); 417 return (YES); 418} 419 420/* 421 * skip_comment -- 422 * skip over comment 423 */ 424void 425skip_comment(t) 426 int t; /* comment character */ 427{ 428 int c; /* character read */ 429 int star; /* '*' flag */ 430 431 for (star = 0; GETC(!=, EOF);) 432 switch(c) { 433 /* comments don't nest, nor can they be escaped. */ 434 case '*': 435 star = YES; 436 break; 437 case '/': 438 if (star && t == '*') 439 return; 440 break; 441 case '\n': 442 if (t == '/') 443 return; 444 SETLINE; 445 /*FALLTHROUGH*/ 446 default: 447 star = NO; 448 break; 449 } 450} 451 452/* 453 * skip_string -- 454 * skip to the end of a string or character constant. 455 */ 456void 457skip_string(key) 458 int key; 459{ 460 int c, 461 skip; 462 463 for (skip = NO; GETC(!=, EOF); ) 464 switch (c) { 465 case '\\': /* a backslash escapes anything */ 466 skip = !skip; /* we toggle in case it's "\\" */ 467 break; 468 case '\n': 469 SETLINE; 470 /*FALLTHROUGH*/ 471 default: 472 if (c == key && !skip) 473 return; 474 skip = NO; 475 } 476} 477 478/* 479 * skip_key -- 480 * skip to next char "key" 481 */ 482int 483skip_key(key) 484 int key; 485{ 486 int c, 487 skip, 488 retval; 489 490 for (skip = retval = NO; GETC(!=, EOF);) 491 switch(c) { 492 case '\\': /* a backslash escapes anything */ 493 skip = !skip; /* we toggle in case it's "\\" */ 494 break; 495 case ';': /* special case for yacc; if one */ 496 case '|': /* of these chars occurs, we may */ 497 retval = YES; /* have moved out of the rule */ 498 break; /* not used by C */ 499 case '\'': 500 case '"': 501 /* skip strings and character constants */ 502 skip_string(c); 503 break; 504 case '/': 505 /* skip comments */ 506 if (GETC(==, '*') || c == '/') { 507 skip_comment(c); 508 break; 509 } 510 (void)ungetc(c, inf); 511 c = '/'; 512 goto norm; 513 case '\n': 514 SETLINE; 515 /*FALLTHROUGH*/ 516 default: 517 norm: 518 if (c == key && !skip) 519 return (retval); 520 skip = NO; 521 } 522 return (retval); 523} 524