C.c revision 100822
1/* 2 * Copyright (c) 1987, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34#if 0 35#ifndef lint 36static char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94"; 37#endif 38#endif 39 40#include <sys/cdefs.h> 41__FBSDID("$FreeBSD: head/usr.bin/ctags/C.c 100822 2002-07-28 15:50:38Z dwmalone $"); 42 43#include <limits.h> 44#include <stdio.h> 45#include <string.h> 46 47#include "ctags.h" 48 49static int func_entry(void); 50static void hash_entry(void); 51static void skip_string(int); 52static int str_entry(int); 53 54/* 55 * c_entries -- 56 * read .c and .h files and call appropriate routines 57 */ 58void 59c_entries(void) 60{ 61 int c; /* current character */ 62 int level; /* brace level */ 63 int token; /* if reading a token */ 64 int t_def; /* if reading a typedef */ 65 int t_level; /* typedef's brace level */ 66 char *sp; /* buffer pointer */ 67 char tok[MAXTOKEN]; /* token buffer */ 68 69 lineftell = ftell(inf); 70 sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1; 71 while (GETC(!=, EOF)) { 72 switch (c) { 73 /* 74 * Here's where it DOESN'T handle: { 75 * foo(a) 76 * { 77 * #ifdef notdef 78 * } 79 * #endif 80 * if (a) 81 * puts("hello, world"); 82 * } 83 */ 84 case '{': 85 ++level; 86 goto endtok; 87 case '}': 88 /* 89 * if level goes below zero, try and fix 90 * it, even though we've already messed up 91 */ 92 if (--level < 0) 93 level = 0; 94 goto endtok; 95 96 case '\n': 97 SETLINE; 98 /* 99 * the above 3 cases are similar in that they 100 * are special characters that also end tokens. 101 */ 102 endtok: if (sp > tok) { 103 *sp = EOS; 104 token = YES; 105 sp = tok; 106 } 107 else 108 token = NO; 109 continue; 110 111 /* 112 * We ignore quoted strings and character constants 113 * completely. 114 */ 115 case '"': 116 case '\'': 117 (void)skip_string(c); 118 break; 119 120 /* 121 * comments can be fun; note the state is unchanged after 122 * return, in case we found: 123 * "foo() XX comment XX { int bar; }" 124 */ 125 case '/': 126 if (GETC(==, '*') || c == '/') { 127 skip_comment(c); 128 continue; 129 } 130 (void)ungetc(c, inf); 131 c = '/'; 132 goto storec; 133 134 /* hash marks flag #define's. */ 135 case '#': 136 if (sp == tok) { 137 hash_entry(); 138 break; 139 } 140 goto storec; 141 142 /* 143 * if we have a current token, parenthesis on 144 * level zero indicates a function. 145 */ 146 case '(': 147 if (!level && token) { 148 int curline; 149 150 if (sp != tok) 151 *sp = EOS; 152 /* 153 * grab the line immediately, we may 154 * already be wrong, for example, 155 * foo\n 156 * (arg1, 157 */ 158 getline(); 159 curline = lineno; 160 if (func_entry()) { 161 ++level; 162 pfnote(tok, curline); 163 } 164 break; 165 } 166 goto storec; 167 168 /* 169 * semi-colons indicate the end of a typedef; if we find a 170 * typedef we search for the next semi-colon of the same 171 * level as the typedef. Ignoring "structs", they are 172 * tricky, since you can find: 173 * 174 * "typedef long time_t;" 175 * "typedef unsigned int u_int;" 176 * "typedef unsigned int u_int [10];" 177 * 178 * If looking at a typedef, we save a copy of the last token 179 * found. Then, when we find the ';' we take the current 180 * token if it starts with a valid token name, else we take 181 * the one we saved. There's probably some reasonable 182 * alternative to this... 183 */ 184 case ';': 185 if (t_def && level == t_level) { 186 t_def = NO; 187 getline(); 188 if (sp != tok) 189 *sp = EOS; 190 pfnote(tok, lineno); 191 break; 192 } 193 goto storec; 194 195 /* 196 * store characters until one that can't be part of a token 197 * comes along; check the current token against certain 198 * reserved words. 199 */ 200 default: 201 /* ignore whitespace */ 202 if (c == ' ' || c == '\t') { 203 int save = c; 204 while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 205 ; 206 if (c == EOF) 207 return; 208 (void)ungetc(c, inf); 209 c = save; 210 } 211 storec: if (!intoken(c)) { 212 if (sp == tok) 213 break; 214 *sp = EOS; 215 if (tflag) { 216 /* no typedefs inside typedefs */ 217 if (!t_def && 218 !memcmp(tok, "typedef",8)) { 219 t_def = YES; 220 t_level = level; 221 break; 222 } 223 /* catch "typedef struct" */ 224 if ((!t_def || t_level < level) 225 && (!memcmp(tok, "struct", 7) 226 || !memcmp(tok, "union", 6) 227 || !memcmp(tok, "enum", 5))) { 228 /* 229 * get line immediately; 230 * may change before '{' 231 */ 232 getline(); 233 if (str_entry(c)) 234 ++level; 235 break; 236 /* } */ 237 } 238 } 239 sp = tok; 240 } 241 else if (sp != tok || begtoken(c)) { 242 if (sp == tok + sizeof tok - 1) 243 /* Too long -- truncate it */ 244 *sp = EOS; 245 else 246 *sp++ = c; 247 token = YES; 248 } 249 continue; 250 } 251 252 sp = tok; 253 token = NO; 254 } 255} 256 257/* 258 * func_entry -- 259 * handle a function reference 260 */ 261static int 262func_entry(void) 263{ 264 int c; /* current character */ 265 int level = 0; /* for matching '()' */ 266 267 /* 268 * Find the end of the assumed function declaration. 269 * Note that ANSI C functions can have type definitions so keep 270 * track of the parentheses nesting level. 271 */ 272 while (GETC(!=, EOF)) { 273 switch (c) { 274 case '\'': 275 case '"': 276 /* skip strings and character constants */ 277 skip_string(c); 278 break; 279 case '/': 280 /* skip comments */ 281 if (GETC(==, '*') || c == '/') 282 skip_comment(c); 283 break; 284 case '(': 285 level++; 286 break; 287 case ')': 288 if (level == 0) 289 goto fnd; 290 level--; 291 break; 292 case '\n': 293 SETLINE; 294 } 295 } 296 return (NO); 297fnd: 298 /* 299 * we assume that the character after a function's right paren 300 * is a token character if it's a function and a non-token 301 * character if it's a declaration. Comments don't count... 302 */ 303 for (;;) { 304 while (GETC(!=, EOF) && iswhite(c)) 305 if (c == '\n') 306 SETLINE; 307 if (intoken(c) || c == '{') 308 break; 309 if (c == '/' && (GETC(==, '*') || c == '/')) 310 skip_comment(c); 311 else { /* don't ever "read" '/' */ 312 (void)ungetc(c, inf); 313 return (NO); 314 } 315 } 316 if (c != '{') 317 (void)skip_key('{'); 318 return (YES); 319} 320 321/* 322 * hash_entry -- 323 * handle a line starting with a '#' 324 */ 325static void 326hash_entry(void) 327{ 328 int c; /* character read */ 329 int curline; /* line started on */ 330 char *sp; /* buffer pointer */ 331 char tok[MAXTOKEN]; /* storage buffer */ 332 333 /* ignore leading whitespace */ 334 while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 335 ; 336 (void)ungetc(c, inf); 337 338 curline = lineno; 339 for (sp = tok;;) { /* get next token */ 340 if (GETC(==, EOF)) 341 return; 342 if (iswhite(c)) 343 break; 344 if (sp == tok + sizeof tok - 1) 345 /* Too long -- truncate it */ 346 *sp = EOS; 347 else 348 *sp++ = c; 349 } 350 *sp = EOS; 351 if (memcmp(tok, "define", 6)) /* only interested in #define's */ 352 goto skip; 353 for (;;) { /* this doesn't handle "#define \n" */ 354 if (GETC(==, EOF)) 355 return; 356 if (!iswhite(c)) 357 break; 358 } 359 for (sp = tok;;) { /* get next token */ 360 if (sp == tok + sizeof tok - 1) 361 /* Too long -- truncate it */ 362 *sp = EOS; 363 else 364 *sp++ = c; 365 if (GETC(==, EOF)) 366 return; 367 /* 368 * this is where it DOESN'T handle 369 * "#define \n" 370 */ 371 if (!intoken(c)) 372 break; 373 } 374 *sp = EOS; 375 if (dflag || c == '(') { /* only want macros */ 376 getline(); 377 pfnote(tok, curline); 378 } 379skip: if (c == '\n') { /* get rid of rest of define */ 380 SETLINE 381 if (*(sp - 1) != '\\') 382 return; 383 } 384 (void)skip_key('\n'); 385} 386 387/* 388 * str_entry -- 389 * handle a struct, union or enum entry 390 */ 391static int 392str_entry(int c) /* c is current character */ 393{ 394 int curline; /* line started on */ 395 char *sp; /* buffer pointer */ 396 char tok[LINE_MAX]; /* storage buffer */ 397 398 curline = lineno; 399 while (iswhite(c)) 400 if (GETC(==, EOF)) 401 return (NO); 402 if (c == '{') /* it was "struct {" */ 403 return (YES); 404 for (sp = tok;;) { /* get next token */ 405 if (sp == tok + sizeof tok - 1) 406 /* Too long -- truncate it */ 407 *sp = EOS; 408 else 409 *sp++ = c; 410 if (GETC(==, EOF)) 411 return (NO); 412 if (!intoken(c)) 413 break; 414 } 415 switch (c) { 416 case '{': /* it was "struct foo{" */ 417 --sp; 418 break; 419 case '\n': /* it was "struct foo\n" */ 420 SETLINE; 421 /*FALLTHROUGH*/ 422 default: /* probably "struct foo " */ 423 while (GETC(!=, EOF)) 424 if (!iswhite(c)) 425 break; 426 if (c != '{') { 427 (void)ungetc(c, inf); 428 return (NO); 429 } 430 } 431 *sp = EOS; 432 pfnote(tok, curline); 433 return (YES); 434} 435 436/* 437 * skip_comment -- 438 * skip over comment 439 */ 440void 441skip_comment(int t) /* t is comment character */ 442{ 443 int c; /* character read */ 444 int star; /* '*' flag */ 445 446 for (star = 0; GETC(!=, EOF);) 447 switch(c) { 448 /* comments don't nest, nor can they be escaped. */ 449 case '*': 450 star = YES; 451 break; 452 case '/': 453 if (star && t == '*') 454 return; 455 break; 456 case '\n': 457 if (t == '/') 458 return; 459 SETLINE; 460 /*FALLTHROUGH*/ 461 default: 462 star = NO; 463 break; 464 } 465} 466 467/* 468 * skip_string -- 469 * skip to the end of a string or character constant. 470 */ 471void 472skip_string(int key) 473{ 474 int c, 475 skip; 476 477 for (skip = NO; GETC(!=, EOF); ) 478 switch (c) { 479 case '\\': /* a backslash escapes anything */ 480 skip = !skip; /* we toggle in case it's "\\" */ 481 break; 482 case '\n': 483 SETLINE; 484 /*FALLTHROUGH*/ 485 default: 486 if (c == key && !skip) 487 return; 488 skip = NO; 489 } 490} 491 492/* 493 * skip_key -- 494 * skip to next char "key" 495 */ 496int 497skip_key(int key) 498{ 499 int c, 500 skip, 501 retval; 502 503 for (skip = retval = NO; GETC(!=, EOF);) 504 switch(c) { 505 case '\\': /* a backslash escapes anything */ 506 skip = !skip; /* we toggle in case it's "\\" */ 507 break; 508 case ';': /* special case for yacc; if one */ 509 case '|': /* of these chars occurs, we may */ 510 retval = YES; /* have moved out of the rule */ 511 break; /* not used by C */ 512 case '\'': 513 case '"': 514 /* skip strings and character constants */ 515 skip_string(c); 516 break; 517 case '/': 518 /* skip comments */ 519 if (GETC(==, '*') || c == '/') { 520 skip_comment(c); 521 break; 522 } 523 (void)ungetc(c, inf); 524 c = '/'; 525 goto norm; 526 case '\n': 527 SETLINE; 528 /*FALLTHROUGH*/ 529 default: 530 norm: 531 if (c == key && !skip) 532 return (retval); 533 skip = NO; 534 } 535 return (retval); 536} 537