C.c revision 1590
1/* 2 * Copyright (c) 1987, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34#ifndef lint 35static char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94"; 36#endif /* not lint */ 37 38#include <limits.h> 39#include <stdio.h> 40#include <string.h> 41 42#include "ctags.h" 43 44static int func_entry __P((void)); 45static void hash_entry __P((void)); 46static void skip_string __P((int)); 47static int str_entry __P((int)); 48 49/* 50 * c_entries -- 51 * read .c and .h files and call appropriate routines 52 */ 53void 54c_entries() 55{ 56 int c; /* current character */ 57 int level; /* brace level */ 58 int token; /* if reading a token */ 59 int t_def; /* if reading a typedef */ 60 int t_level; /* typedef's brace level */ 61 char *sp; /* buffer pointer */ 62 char tok[MAXTOKEN]; /* token buffer */ 63 64 lineftell = ftell(inf); 65 sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1; 66 while (GETC(!=, EOF)) { 67 switch (c) { 68 /* 69 * Here's where it DOESN'T handle: { 70 * foo(a) 71 * { 72 * #ifdef notdef 73 * } 74 * #endif 75 * if (a) 76 * puts("hello, world"); 77 * } 78 */ 79 case '{': 80 ++level; 81 goto endtok; 82 case '}': 83 /* 84 * if level goes below zero, try and fix 85 * it, even though we've already messed up 86 */ 87 if (--level < 0) 88 level = 0; 89 goto endtok; 90 91 case '\n': 92 SETLINE; 93 /* 94 * the above 3 cases are similar in that they 95 * are special characters that also end tokens. 96 */ 97 endtok: if (sp > tok) { 98 *sp = EOS; 99 token = YES; 100 sp = tok; 101 } 102 else 103 token = NO; 104 continue; 105 106 /* 107 * We ignore quoted strings and character constants 108 * completely. 109 */ 110 case '"': 111 case '\'': 112 (void)skip_string(c); 113 break; 114 115 /* 116 * comments can be fun; note the state is unchanged after 117 * return, in case we found: 118 * "foo() XX comment XX { int bar; }" 119 */ 120 case '/': 121 if (GETC(==, '*')) { 122 skip_comment(); 123 continue; 124 } 125 (void)ungetc(c, inf); 126 c = '/'; 127 goto storec; 128 129 /* hash marks flag #define's. */ 130 case '#': 131 if (sp == tok) { 132 hash_entry(); 133 break; 134 } 135 goto storec; 136 137 /* 138 * if we have a current token, parenthesis on 139 * level zero indicates a function. 140 */ 141 case '(': 142 if (!level && token) { 143 int curline; 144 145 if (sp != tok) 146 *sp = EOS; 147 /* 148 * grab the line immediately, we may 149 * already be wrong, for example, 150 * foo\n 151 * (arg1, 152 */ 153 getline(); 154 curline = lineno; 155 if (func_entry()) { 156 ++level; 157 pfnote(tok, curline); 158 } 159 break; 160 } 161 goto storec; 162 163 /* 164 * semi-colons indicate the end of a typedef; if we find a 165 * typedef we search for the next semi-colon of the same 166 * level as the typedef. Ignoring "structs", they are 167 * tricky, since you can find: 168 * 169 * "typedef long time_t;" 170 * "typedef unsigned int u_int;" 171 * "typedef unsigned int u_int [10];" 172 * 173 * If looking at a typedef, we save a copy of the last token 174 * found. Then, when we find the ';' we take the current 175 * token if it starts with a valid token name, else we take 176 * the one we saved. There's probably some reasonable 177 * alternative to this... 178 */ 179 case ';': 180 if (t_def && level == t_level) { 181 t_def = NO; 182 getline(); 183 if (sp != tok) 184 *sp = EOS; 185 pfnote(tok, lineno); 186 break; 187 } 188 goto storec; 189 190 /* 191 * store characters until one that can't be part of a token 192 * comes along; check the current token against certain 193 * reserved words. 194 */ 195 default: 196 storec: if (!intoken(c)) { 197 if (sp == tok) 198 break; 199 *sp = EOS; 200 if (tflag) { 201 /* no typedefs inside typedefs */ 202 if (!t_def && 203 !memcmp(tok, "typedef",8)) { 204 t_def = YES; 205 t_level = level; 206 break; 207 } 208 /* catch "typedef struct" */ 209 if ((!t_def || t_level < level) 210 && (!memcmp(tok, "struct", 7) 211 || !memcmp(tok, "union", 6) 212 || !memcmp(tok, "enum", 5))) { 213 /* 214 * get line immediately; 215 * may change before '{' 216 */ 217 getline(); 218 if (str_entry(c)) 219 ++level; 220 break; 221 /* } */ 222 } 223 } 224 sp = tok; 225 } 226 else if (sp != tok || begtoken(c)) { 227 *sp++ = c; 228 token = YES; 229 } 230 continue; 231 } 232 233 sp = tok; 234 token = NO; 235 } 236} 237 238/* 239 * func_entry -- 240 * handle a function reference 241 */ 242static int 243func_entry() 244{ 245 int c; /* current character */ 246 int level = 0; /* for matching '()' */ 247 248 /* 249 * Find the end of the assumed function declaration. 250 * Note that ANSI C functions can have type definitions so keep 251 * track of the parentheses nesting level. 252 */ 253 while (GETC(!=, EOF)) { 254 switch (c) { 255 case '\'': 256 case '"': 257 /* skip strings and character constants */ 258 skip_string(c); 259 break; 260 case '/': 261 /* skip comments */ 262 if (GETC(==, '*')) 263 skip_comment(); 264 break; 265 case '(': 266 level++; 267 break; 268 case ')': 269 if (level == 0) 270 goto fnd; 271 level--; 272 break; 273 case '\n': 274 SETLINE; 275 } 276 } 277 return (NO); 278fnd: 279 /* 280 * we assume that the character after a function's right paren 281 * is a token character if it's a function and a non-token 282 * character if it's a declaration. Comments don't count... 283 */ 284 for (;;) { 285 while (GETC(!=, EOF) && iswhite(c)) 286 if (c == '\n') 287 SETLINE; 288 if (intoken(c) || c == '{') 289 break; 290 if (c == '/' && GETC(==, '*')) 291 skip_comment(); 292 else { /* don't ever "read" '/' */ 293 (void)ungetc(c, inf); 294 return (NO); 295 } 296 } 297 if (c != '{') 298 (void)skip_key('{'); 299 return (YES); 300} 301 302/* 303 * hash_entry -- 304 * handle a line starting with a '#' 305 */ 306static void 307hash_entry() 308{ 309 int c; /* character read */ 310 int curline; /* line started on */ 311 char *sp; /* buffer pointer */ 312 char tok[MAXTOKEN]; /* storage buffer */ 313 314 curline = lineno; 315 for (sp = tok;;) { /* get next token */ 316 if (GETC(==, EOF)) 317 return; 318 if (iswhite(c)) 319 break; 320 *sp++ = c; 321 } 322 *sp = EOS; 323 if (memcmp(tok, "define", 6)) /* only interested in #define's */ 324 goto skip; 325 for (;;) { /* this doesn't handle "#define \n" */ 326 if (GETC(==, EOF)) 327 return; 328 if (!iswhite(c)) 329 break; 330 } 331 for (sp = tok;;) { /* get next token */ 332 *sp++ = c; 333 if (GETC(==, EOF)) 334 return; 335 /* 336 * this is where it DOESN'T handle 337 * "#define \n" 338 */ 339 if (!intoken(c)) 340 break; 341 } 342 *sp = EOS; 343 if (dflag || c == '(') { /* only want macros */ 344 getline(); 345 pfnote(tok, curline); 346 } 347skip: if (c == '\n') { /* get rid of rest of define */ 348 SETLINE 349 if (*(sp - 1) != '\\') 350 return; 351 } 352 (void)skip_key('\n'); 353} 354 355/* 356 * str_entry -- 357 * handle a struct, union or enum entry 358 */ 359static int 360str_entry(c) 361 int c; /* current character */ 362{ 363 int curline; /* line started on */ 364 char *sp; /* buffer pointer */ 365 char tok[LINE_MAX]; /* storage buffer */ 366 367 curline = lineno; 368 while (iswhite(c)) 369 if (GETC(==, EOF)) 370 return (NO); 371 if (c == '{') /* it was "struct {" */ 372 return (YES); 373 for (sp = tok;;) { /* get next token */ 374 *sp++ = c; 375 if (GETC(==, EOF)) 376 return (NO); 377 if (!intoken(c)) 378 break; 379 } 380 switch (c) { 381 case '{': /* it was "struct foo{" */ 382 --sp; 383 break; 384 case '\n': /* it was "struct foo\n" */ 385 SETLINE; 386 /*FALLTHROUGH*/ 387 default: /* probably "struct foo " */ 388 while (GETC(!=, EOF)) 389 if (!iswhite(c)) 390 break; 391 if (c != '{') { 392 (void)ungetc(c, inf); 393 return (NO); 394 } 395 } 396 *sp = EOS; 397 pfnote(tok, curline); 398 return (YES); 399} 400 401/* 402 * skip_comment -- 403 * skip over comment 404 */ 405void 406skip_comment() 407{ 408 int c; /* character read */ 409 int star; /* '*' flag */ 410 411 for (star = 0; GETC(!=, EOF);) 412 switch(c) { 413 /* comments don't nest, nor can they be escaped. */ 414 case '*': 415 star = YES; 416 break; 417 case '/': 418 if (star) 419 return; 420 break; 421 case '\n': 422 SETLINE; 423 /*FALLTHROUGH*/ 424 default: 425 star = NO; 426 break; 427 } 428} 429 430/* 431 * skip_string -- 432 * skip to the end of a string or character constant. 433 */ 434void 435skip_string(key) 436 int key; 437{ 438 int c, 439 skip; 440 441 for (skip = NO; GETC(!=, EOF); ) 442 switch (c) { 443 case '\\': /* a backslash escapes anything */ 444 skip = !skip; /* we toggle in case it's "\\" */ 445 break; 446 case '\n': 447 SETLINE; 448 /*FALLTHROUGH*/ 449 default: 450 if (c == key && !skip) 451 return; 452 skip = NO; 453 } 454} 455 456/* 457 * skip_key -- 458 * skip to next char "key" 459 */ 460int 461skip_key(key) 462 int key; 463{ 464 int c, 465 skip, 466 retval; 467 468 for (skip = retval = NO; GETC(!=, EOF);) 469 switch(c) { 470 case '\\': /* a backslash escapes anything */ 471 skip = !skip; /* we toggle in case it's "\\" */ 472 break; 473 case ';': /* special case for yacc; if one */ 474 case '|': /* of these chars occurs, we may */ 475 retval = YES; /* have moved out of the rule */ 476 break; /* not used by C */ 477 case '\'': 478 case '"': 479 /* skip strings and character constants */ 480 skip_string(c); 481 break; 482 case '/': 483 /* skip comments */ 484 if (GETC(==, '*')) { 485 skip_comment(); 486 break; 487 } 488 (void)ungetc(c, inf); 489 c = '/'; 490 goto norm; 491 case '\n': 492 SETLINE; 493 /*FALLTHROUGH*/ 494 default: 495 norm: 496 if (c == key && !skip) 497 return (retval); 498 skip = NO; 499 } 500 return (retval); 501} 502