1/* $NetBSD: C.c,v 1.18 2009/04/11 12:58:03 lukem Exp $ */ 2 3/* 4 * Copyright (c) 1987, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32#if HAVE_NBTOOL_CONFIG_H 33#include "nbtool_config.h" 34#endif 35 36#include <sys/cdefs.h> 37#if defined(__RCSID) && !defined(lint) 38#if 0 39static char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94"; 40#else 41__RCSID("$NetBSD: C.c,v 1.18 2009/04/11 12:58:03 lukem Exp $"); 42#endif 43#endif /* not lint */ 44 45#include <limits.h> 46#include <stddef.h> 47#include <stdio.h> 48#include <string.h> 49 50#include "ctags.h" 51 52static int func_entry(void); 53static void hash_entry(void); 54static void skip_string(int); 55static int str_entry(int); 56 57/* 58 * c_entries -- 59 * read .c and .h files and call appropriate routines 60 */ 61void 62c_entries(void) 63{ 64 int c; /* current character */ 65 int level; /* brace level */ 66 int token; /* if reading a token */ 67 int t_def; /* if reading a typedef */ 68 int t_level; /* typedef's brace level */ 69 char *sp; /* buffer pointer */ 70 char tok[MAXTOKEN]; /* token buffer */ 71 72 lineftell = ftell(inf); 73 sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1; 74 while (GETC(!=, EOF)) { 75 switch (c) { 76 /* 77 * Here's where it DOESN'T handle: { 78 * foo(a) 79 * { 80 * #ifdef notdef 81 * } 82 * #endif 83 * if (a) 84 * puts("hello, world"); 85 * } 86 */ 87 case '{': 88 ++level; 89 goto endtok; 90 case '}': 91 /* 92 * if level goes below zero, try and fix 93 * it, even though we've already messed up 94 */ 95 if (--level < 0) 96 level = 0; 97 goto endtok; 98 99 case '\n': 100 SETLINE; 101 /* 102 * the above 3 cases are similar in that they 103 * are special characters that also end tokens. 104 */ 105 endtok: if (sp > tok) { 106 *sp = EOS; 107 token = YES; 108 sp = tok; 109 } 110 else 111 token = NO; 112 continue; 113 114 /* 115 * We ignore quoted strings and character constants 116 * completely. 117 */ 118 case '"': 119 case '\'': 120 (void)skip_string(c); 121 break; 122 123 /* 124 * comments can be fun; note the state is unchanged after 125 * return, in case we found: 126 * "foo() XX comment XX { int bar; }" 127 */ 128 case '/': 129 if (GETC(==, '*')) { 130 skip_comment(c); 131 continue; 132 } else if (c == '/') { 133 skip_comment(c); 134 continue; 135 } 136 (void)ungetc(c, inf); 137 c = '/'; 138 goto storec; 139 140 /* hash marks flag #define's. */ 141 case '#': 142 if (sp == tok) { 143 hash_entry(); 144 break; 145 } 146 goto storec; 147 148 /* 149 * if we have a current token, parenthesis on 150 * level zero indicates a function. 151 */ 152 case '(': 153 do c = getc(inf); 154 while (c != EOF && iswhite(c)); 155 if (c == '*') 156 break; 157 if (c != EOF) 158 ungetc(c, inf); 159 if (!level && token) { 160 int curline; 161 162 if (sp != tok) 163 *sp = EOS; 164 /* 165 * grab the line immediately, we may 166 * already be wrong, for example, 167 * foo\n 168 * (arg1, 169 */ 170 get_line(); 171 curline = lineno; 172 if (func_entry()) { 173 ++level; 174 pfnote(tok, curline); 175 } 176 break; 177 } 178 goto storec; 179 180 /* 181 * semi-colons indicate the end of a typedef; if we find a 182 * typedef we search for the next semi-colon of the same 183 * level as the typedef. Ignoring "structs", they are 184 * tricky, since you can find: 185 * 186 * "typedef long time_t;" 187 * "typedef unsigned int u_int;" 188 * "typedef unsigned int u_int [10];" 189 * 190 * If looking at a typedef, we save a copy of the last token 191 * found. Then, when we find the ';' we take the current 192 * token if it starts with a valid token name, else we take 193 * the one we saved. There's probably some reasonable 194 * alternative to this... 195 */ 196 case ';': 197 if (t_def && level == t_level) { 198 t_def = NO; 199 get_line(); 200 if (sp != tok) 201 *sp = EOS; 202 pfnote(tok, lineno); 203 break; 204 } 205 goto storec; 206 207 /* 208 * store characters until one that can't be part of a token 209 * comes along; check the current token against certain 210 * reserved words. 211 */ 212 default: 213 storec: if (c == EOF) 214 break; 215 if (!intoken(c)) { 216 if (sp == tok) 217 break; 218 *sp = EOS; 219 if (tflag) { 220 /* no typedefs inside typedefs */ 221 if (!t_def && 222 !memcmp(tok, "typedef",8)) { 223 t_def = YES; 224 t_level = level; 225 break; 226 } 227 /* catch "typedef struct" */ 228 if ((!t_def || t_level <= level) 229 && (!memcmp(tok, "struct", 7) 230 || !memcmp(tok, "union", 6) 231 || !memcmp(tok, "enum", 5))) { 232 /* 233 * get line immediately; 234 * may change before '{' 235 */ 236 get_line(); 237 if (str_entry(c)) 238 ++level; 239 break; 240 /* } */ 241 } 242 } 243 sp = tok; 244 } 245 else if (sp != tok || begtoken(c)) { 246 if (sp < tok + sizeof tok) 247 *sp++ = c; 248 token = YES; 249 } 250 continue; 251 } 252 253 sp = tok; 254 token = NO; 255 } 256} 257 258/* 259 * func_entry -- 260 * handle a function reference 261 */ 262static int 263func_entry(void) 264{ 265 int c; /* current character */ 266 int level = 0; /* for matching '()' */ 267 static char attribute[] = "__attribute__"; 268 char maybe_attribute[sizeof attribute + 1], 269 *anext; 270 271 /* 272 * Find the end of the assumed function declaration. 273 * Note that ANSI C functions can have type definitions so keep 274 * track of the parentheses nesting level. 275 */ 276 while (GETC(!=, EOF)) { 277 switch (c) { 278 case '\'': 279 case '"': 280 /* skip strings and character constants */ 281 skip_string(c); 282 break; 283 case '/': 284 /* skip comments */ 285 if (GETC(==, '*')) 286 skip_comment(c); 287 else if (c == '/') 288 skip_comment(c); 289 break; 290 case '(': 291 level++; 292 break; 293 case ')': 294 if (level == 0) 295 goto fnd; 296 level--; 297 break; 298 case '\n': 299 SETLINE; 300 } 301 } 302 return (NO); 303fnd: 304 /* 305 * we assume that the character after a function's right paren 306 * is a token character if it's a function and a non-token 307 * character if it's a declaration. Comments don't count... 308 */ 309 for (anext = maybe_attribute;;) { 310 while (GETC(!=, EOF) && iswhite(c)) 311 if (c == '\n') 312 SETLINE; 313 if (c == EOF) 314 return NO; 315 /* 316 * Recognize the gnu __attribute__ extension, which would 317 * otherwise make the heuristic test DTWT 318 */ 319 if (anext == maybe_attribute) { 320 if (intoken(c)) { 321 *anext++ = c; 322 continue; 323 } 324 } else { 325 if (intoken(c)) { 326 if (anext - maybe_attribute 327 < (ptrdiff_t)(sizeof attribute - 1)) 328 *anext++ = c; 329 else break; 330 continue; 331 } else { 332 *anext++ = '\0'; 333 if (strcmp(maybe_attribute, attribute) == 0) { 334 (void)ungetc(c, inf); 335 return NO; 336 } 337 break; 338 } 339 } 340 if (intoken(c) || c == '{') 341 break; 342 if (c == '/' && GETC(==, '*')) 343 skip_comment(c); 344 else if (c == '/') 345 skip_comment(c); 346 else { /* don't ever "read" '/' */ 347 (void)ungetc(c, inf); 348 return (NO); 349 } 350 } 351 if (c != '{') 352 (void)skip_key('{'); 353 return (YES); 354} 355 356/* 357 * hash_entry -- 358 * handle a line starting with a '#' 359 */ 360static void 361hash_entry(void) 362{ 363 int c; /* character read */ 364 int curline; /* line started on */ 365 char *sp; /* buffer pointer */ 366 char tok[MAXTOKEN]; /* storage buffer */ 367 368 curline = lineno; 369 do if (GETC(==, EOF)) 370 return; 371 while(c != '\n' && iswhite(c)); 372 ungetc(c, inf); 373 for (sp = tok;;) { /* get next token */ 374 if (GETC(==, EOF)) 375 return; 376 if (iswhite(c)) 377 break; 378 if (sp < tok + sizeof tok) 379 *sp++ = c; 380 } 381 if(sp >= tok + sizeof tok) 382 --sp; 383 *sp = EOS; 384 if (memcmp(tok, "define", 6)) /* only interested in #define's */ 385 goto skip; 386 for (;;) { /* this doesn't handle "#define \n" */ 387 if (GETC(==, EOF)) 388 return; 389 if (!iswhite(c)) 390 break; 391 } 392 for (sp = tok;;) { /* get next token */ 393 if(sp < tok + sizeof tok) 394 *sp++ = c; 395 if (GETC(==, EOF)) 396 return; 397 /* 398 * this is where it DOESN'T handle 399 * "#define \n" 400 */ 401 if (!intoken(c)) 402 break; 403 } 404 if(sp >= tok + sizeof tok) 405 --sp; 406 *sp = EOS; 407 if (dflag || c == '(') { /* only want macros */ 408 get_line(); 409 pfnote(tok, curline); 410 } 411skip: if (c == '\n') { /* get rid of rest of define */ 412 SETLINE 413 if (*(sp - 1) != '\\') 414 return; 415 } 416 (void)skip_key('\n'); 417} 418 419/* 420 * str_entry -- 421 * handle a struct, union or enum entry 422 */ 423static int 424str_entry(int c /* current character */) 425{ 426 int curline; /* line started on */ 427 char *sp; /* buffer pointer */ 428 char tok[LINE_MAX]; /* storage buffer */ 429 430 curline = lineno; 431 while (iswhite(c)) 432 if (GETC(==, EOF)) 433 return (NO); 434 if (c == '{') /* it was "struct {" */ 435 return (YES); 436 for (sp = tok;;) { /* get next token */ 437 *sp++ = c; 438 if (GETC(==, EOF)) 439 return (NO); 440 if (!intoken(c)) 441 break; 442 } 443 switch (c) { 444 case '{': /* it was "struct foo{" */ 445 --sp; 446 break; 447 case '\n': /* it was "struct foo\n" */ 448 SETLINE; 449 /*FALLTHROUGH*/ 450 default: /* probably "struct foo " */ 451 while (GETC(!=, EOF)) 452 if (!iswhite(c)) 453 break; 454 if (c != '{') { 455 (void)ungetc(c, inf); 456 return (NO); 457 } 458 } 459 *sp = EOS; 460 pfnote(tok, curline); 461 return (YES); 462} 463 464/* 465 * skip_comment -- 466 * skip over comment 467 */ 468void 469skip_comment(int commenttype) 470{ 471 int c; /* character read */ 472 int star; /* '*' flag */ 473 474 for (star = 0; GETC(!=, EOF);) 475 switch(c) { 476 /* comments don't nest, nor can they be escaped. */ 477 case '*': 478 star = YES; 479 break; 480 case '/': 481 if (commenttype == '*' && star) 482 return; 483 break; 484 case '\n': 485 if (commenttype == '/') { 486 /* 487 * we don't really parse C, so sometimes it 488 * is necessary to see the newline 489 */ 490 ungetc(c, inf); 491 return; 492 } 493 SETLINE; 494 /*FALLTHROUGH*/ 495 default: 496 star = NO; 497 break; 498 } 499} 500 501/* 502 * skip_string -- 503 * skip to the end of a string or character constant. 504 */ 505void 506skip_string(int key) 507{ 508 int c, 509 skip; 510 511 for (skip = NO; GETC(!=, EOF); ) 512 switch (c) { 513 case '\\': /* a backslash escapes anything */ 514 skip = !skip; /* we toggle in case it's "\\" */ 515 break; 516 case '\n': 517 SETLINE; 518 /*FALLTHROUGH*/ 519 default: 520 if (c == key && !skip) 521 return; 522 skip = NO; 523 } 524} 525 526/* 527 * skip_key -- 528 * skip to next char "key" 529 */ 530int 531skip_key(int key) 532{ 533 int c, 534 skip, 535 retval; 536 537 for (skip = retval = NO; GETC(!=, EOF);) 538 switch(c) { 539 case '\\': /* a backslash escapes anything */ 540 skip = !skip; /* we toggle in case it's "\\" */ 541 break; 542 case ';': /* special case for yacc; if one */ 543 case '|': /* of these chars occurs, we may */ 544 retval = YES; /* have moved out of the rule */ 545 break; /* not used by C */ 546 case '\'': 547 case '"': 548 /* skip strings and character constants */ 549 skip_string(c); 550 break; 551 case '/': 552 /* skip comments */ 553 if (GETC(==, '*')) { 554 skip_comment(c); 555 break; 556 } else if (c == '/') { 557 skip_comment(c); 558 break; 559 } 560 (void)ungetc(c, inf); 561 c = '/'; 562 goto norm; 563 case '\n': 564 SETLINE; 565 /*FALLTHROUGH*/ 566 default: 567 norm: 568 if (c == key && !skip) 569 return (retval); 570 skip = NO; 571 } 572 return (retval); 573} 574