11590Srgrimes/* 21590Srgrimes * Copyright (c) 1987, 1993, 1994 31590Srgrimes * The Regents of the University of California. All rights reserved. 41590Srgrimes * 51590Srgrimes * Redistribution and use in source and binary forms, with or without 61590Srgrimes * modification, are permitted provided that the following conditions 71590Srgrimes * are met: 81590Srgrimes * 1. Redistributions of source code must retain the above copyright 91590Srgrimes * notice, this list of conditions and the following disclaimer. 101590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111590Srgrimes * notice, this list of conditions and the following disclaimer in the 121590Srgrimes * documentation and/or other materials provided with the distribution. 131590Srgrimes * 4. Neither the name of the University nor the names of its contributors 141590Srgrimes * may be used to endorse or promote products derived from this software 151590Srgrimes * without specific prior written permission. 161590Srgrimes * 171590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 181590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 191590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 201590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 211590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 221590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 231590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 241590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 251590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 261590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 271590Srgrimes * SUCH DAMAGE. 281590Srgrimes */ 291590Srgrimes 3087628Sdwmalone#if 0 311590Srgrimes#ifndef lint 321590Srgrimesstatic char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94"; 3328625Ssteve#endif 3487628Sdwmalone#endif 351590Srgrimes 3687628Sdwmalone#include <sys/cdefs.h> 3787628Sdwmalone__FBSDID("$FreeBSD$"); 3887628Sdwmalone 391590Srgrimes#include <limits.h> 401590Srgrimes#include <stdio.h> 4191382Sdwmalone#include <string.h> 421590Srgrimes 431590Srgrimes#include "ctags.h" 441590Srgrimes 4592920Simpstatic int func_entry(void); 4692920Simpstatic void hash_entry(void); 4792920Simpstatic void skip_string(int); 4892920Simpstatic int str_entry(int); 491590Srgrimes 501590Srgrimes/* 511590Srgrimes * c_entries -- 521590Srgrimes * read .c and .h files and call appropriate routines 531590Srgrimes */ 541590Srgrimesvoid 55100822Sdwmalonec_entries(void) 561590Srgrimes{ 571590Srgrimes int c; /* current character */ 581590Srgrimes int level; /* brace level */ 591590Srgrimes int token; /* if reading a token */ 601590Srgrimes int t_def; /* if reading a typedef */ 611590Srgrimes int t_level; /* typedef's brace level */ 621590Srgrimes char *sp; /* buffer pointer */ 631590Srgrimes char tok[MAXTOKEN]; /* token buffer */ 641590Srgrimes 651590Srgrimes lineftell = ftell(inf); 661590Srgrimes sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1; 671590Srgrimes while (GETC(!=, EOF)) { 681590Srgrimes switch (c) { 691590Srgrimes /* 701590Srgrimes * Here's where it DOESN'T handle: { 711590Srgrimes * foo(a) 721590Srgrimes * { 731590Srgrimes * #ifdef notdef 741590Srgrimes * } 751590Srgrimes * #endif 761590Srgrimes * if (a) 771590Srgrimes * puts("hello, world"); 781590Srgrimes * } 791590Srgrimes */ 801590Srgrimes case '{': 811590Srgrimes ++level; 821590Srgrimes goto endtok; 831590Srgrimes case '}': 841590Srgrimes /* 851590Srgrimes * if level goes below zero, try and fix 861590Srgrimes * it, even though we've already messed up 871590Srgrimes */ 881590Srgrimes if (--level < 0) 891590Srgrimes level = 0; 901590Srgrimes goto endtok; 911590Srgrimes 921590Srgrimes case '\n': 931590Srgrimes SETLINE; 941590Srgrimes /* 951590Srgrimes * the above 3 cases are similar in that they 961590Srgrimes * are special characters that also end tokens. 971590Srgrimes */ 981590Srgrimes endtok: if (sp > tok) { 991590Srgrimes *sp = EOS; 1001590Srgrimes token = YES; 1011590Srgrimes sp = tok; 1021590Srgrimes } 1031590Srgrimes else 1041590Srgrimes token = NO; 1051590Srgrimes continue; 1061590Srgrimes 1071590Srgrimes /* 1081590Srgrimes * We ignore quoted strings and character constants 1091590Srgrimes * completely. 1101590Srgrimes */ 1111590Srgrimes case '"': 1121590Srgrimes case '\'': 113166503Srse skip_string(c); 1141590Srgrimes break; 1151590Srgrimes 1161590Srgrimes /* 1171590Srgrimes * comments can be fun; note the state is unchanged after 1181590Srgrimes * return, in case we found: 1191590Srgrimes * "foo() XX comment XX { int bar; }" 1201590Srgrimes */ 1211590Srgrimes case '/': 12291189Sgshapiro if (GETC(==, '*') || c == '/') { 12391189Sgshapiro skip_comment(c); 1241590Srgrimes continue; 1251590Srgrimes } 1261590Srgrimes (void)ungetc(c, inf); 1271590Srgrimes c = '/'; 1281590Srgrimes goto storec; 1291590Srgrimes 1301590Srgrimes /* hash marks flag #define's. */ 1311590Srgrimes case '#': 1321590Srgrimes if (sp == tok) { 1331590Srgrimes hash_entry(); 1341590Srgrimes break; 1351590Srgrimes } 1361590Srgrimes goto storec; 1371590Srgrimes 1381590Srgrimes /* 1391590Srgrimes * if we have a current token, parenthesis on 1401590Srgrimes * level zero indicates a function. 1411590Srgrimes */ 1421590Srgrimes case '(': 1431590Srgrimes if (!level && token) { 1441590Srgrimes int curline; 1451590Srgrimes 1461590Srgrimes if (sp != tok) 1471590Srgrimes *sp = EOS; 1481590Srgrimes /* 1491590Srgrimes * grab the line immediately, we may 1501590Srgrimes * already be wrong, for example, 1511590Srgrimes * foo\n 1521590Srgrimes * (arg1, 1531590Srgrimes */ 1541590Srgrimes getline(); 1551590Srgrimes curline = lineno; 1561590Srgrimes if (func_entry()) { 1571590Srgrimes ++level; 1581590Srgrimes pfnote(tok, curline); 1591590Srgrimes } 1601590Srgrimes break; 1611590Srgrimes } 1621590Srgrimes goto storec; 1631590Srgrimes 1641590Srgrimes /* 1651590Srgrimes * semi-colons indicate the end of a typedef; if we find a 1661590Srgrimes * typedef we search for the next semi-colon of the same 1671590Srgrimes * level as the typedef. Ignoring "structs", they are 1681590Srgrimes * tricky, since you can find: 1691590Srgrimes * 1701590Srgrimes * "typedef long time_t;" 1711590Srgrimes * "typedef unsigned int u_int;" 1721590Srgrimes * "typedef unsigned int u_int [10];" 1731590Srgrimes * 1741590Srgrimes * If looking at a typedef, we save a copy of the last token 1751590Srgrimes * found. Then, when we find the ';' we take the current 1761590Srgrimes * token if it starts with a valid token name, else we take 1771590Srgrimes * the one we saved. There's probably some reasonable 1781590Srgrimes * alternative to this... 1791590Srgrimes */ 1801590Srgrimes case ';': 1811590Srgrimes if (t_def && level == t_level) { 1821590Srgrimes t_def = NO; 1831590Srgrimes getline(); 1841590Srgrimes if (sp != tok) 1851590Srgrimes *sp = EOS; 1861590Srgrimes pfnote(tok, lineno); 1871590Srgrimes break; 1881590Srgrimes } 1891590Srgrimes goto storec; 1901590Srgrimes 1911590Srgrimes /* 1921590Srgrimes * store characters until one that can't be part of a token 1931590Srgrimes * comes along; check the current token against certain 1941590Srgrimes * reserved words. 1951590Srgrimes */ 1961590Srgrimes default: 19728625Ssteve /* ignore whitespace */ 19828625Ssteve if (c == ' ' || c == '\t') { 19928625Ssteve int save = c; 20028625Ssteve while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 20128625Ssteve ; 20228625Ssteve if (c == EOF) 20328625Ssteve return; 20428625Ssteve (void)ungetc(c, inf); 20528625Ssteve c = save; 20628625Ssteve } 2071590Srgrimes storec: if (!intoken(c)) { 2081590Srgrimes if (sp == tok) 2091590Srgrimes break; 2101590Srgrimes *sp = EOS; 2111590Srgrimes if (tflag) { 2121590Srgrimes /* no typedefs inside typedefs */ 2131590Srgrimes if (!t_def && 2141590Srgrimes !memcmp(tok, "typedef",8)) { 2151590Srgrimes t_def = YES; 2161590Srgrimes t_level = level; 2171590Srgrimes break; 2181590Srgrimes } 2191590Srgrimes /* catch "typedef struct" */ 2201590Srgrimes if ((!t_def || t_level < level) 2211590Srgrimes && (!memcmp(tok, "struct", 7) 2221590Srgrimes || !memcmp(tok, "union", 6) 2231590Srgrimes || !memcmp(tok, "enum", 5))) { 2241590Srgrimes /* 2251590Srgrimes * get line immediately; 2261590Srgrimes * may change before '{' 2271590Srgrimes */ 2281590Srgrimes getline(); 2291590Srgrimes if (str_entry(c)) 2301590Srgrimes ++level; 2311590Srgrimes break; 2321590Srgrimes /* } */ 2331590Srgrimes } 2341590Srgrimes } 2351590Srgrimes sp = tok; 2361590Srgrimes } 2371590Srgrimes else if (sp != tok || begtoken(c)) { 23897574Stjr if (sp == tok + sizeof tok - 1) 23997574Stjr /* Too long -- truncate it */ 24097574Stjr *sp = EOS; 24197574Stjr else 24297574Stjr *sp++ = c; 2431590Srgrimes token = YES; 2441590Srgrimes } 2451590Srgrimes continue; 2461590Srgrimes } 2471590Srgrimes 2481590Srgrimes sp = tok; 2491590Srgrimes token = NO; 2501590Srgrimes } 2511590Srgrimes} 2521590Srgrimes 2531590Srgrimes/* 2541590Srgrimes * func_entry -- 2551590Srgrimes * handle a function reference 2561590Srgrimes */ 2571590Srgrimesstatic int 258100822Sdwmalonefunc_entry(void) 2591590Srgrimes{ 2601590Srgrimes int c; /* current character */ 2611590Srgrimes int level = 0; /* for matching '()' */ 2621590Srgrimes 2631590Srgrimes /* 2641590Srgrimes * Find the end of the assumed function declaration. 2651590Srgrimes * Note that ANSI C functions can have type definitions so keep 2661590Srgrimes * track of the parentheses nesting level. 2671590Srgrimes */ 2681590Srgrimes while (GETC(!=, EOF)) { 2691590Srgrimes switch (c) { 2701590Srgrimes case '\'': 2711590Srgrimes case '"': 2721590Srgrimes /* skip strings and character constants */ 2731590Srgrimes skip_string(c); 2741590Srgrimes break; 2751590Srgrimes case '/': 2761590Srgrimes /* skip comments */ 27791189Sgshapiro if (GETC(==, '*') || c == '/') 27891189Sgshapiro skip_comment(c); 2791590Srgrimes break; 2801590Srgrimes case '(': 2811590Srgrimes level++; 2821590Srgrimes break; 2831590Srgrimes case ')': 2841590Srgrimes if (level == 0) 2851590Srgrimes goto fnd; 2861590Srgrimes level--; 2871590Srgrimes break; 2881590Srgrimes case '\n': 2891590Srgrimes SETLINE; 2901590Srgrimes } 2911590Srgrimes } 2921590Srgrimes return (NO); 2931590Srgrimesfnd: 2941590Srgrimes /* 2951590Srgrimes * we assume that the character after a function's right paren 2961590Srgrimes * is a token character if it's a function and a non-token 2971590Srgrimes * character if it's a declaration. Comments don't count... 2981590Srgrimes */ 2991590Srgrimes for (;;) { 3001590Srgrimes while (GETC(!=, EOF) && iswhite(c)) 3011590Srgrimes if (c == '\n') 3021590Srgrimes SETLINE; 3031590Srgrimes if (intoken(c) || c == '{') 3041590Srgrimes break; 30591189Sgshapiro if (c == '/' && (GETC(==, '*') || c == '/')) 30691189Sgshapiro skip_comment(c); 3071590Srgrimes else { /* don't ever "read" '/' */ 3081590Srgrimes (void)ungetc(c, inf); 3091590Srgrimes return (NO); 3101590Srgrimes } 3111590Srgrimes } 3121590Srgrimes if (c != '{') 3131590Srgrimes (void)skip_key('{'); 3141590Srgrimes return (YES); 3151590Srgrimes} 3161590Srgrimes 3171590Srgrimes/* 3181590Srgrimes * hash_entry -- 3191590Srgrimes * handle a line starting with a '#' 3201590Srgrimes */ 3211590Srgrimesstatic void 322100822Sdwmalonehash_entry(void) 3231590Srgrimes{ 3241590Srgrimes int c; /* character read */ 3251590Srgrimes int curline; /* line started on */ 3261590Srgrimes char *sp; /* buffer pointer */ 3271590Srgrimes char tok[MAXTOKEN]; /* storage buffer */ 3281590Srgrimes 32928625Ssteve /* ignore leading whitespace */ 33028625Ssteve while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 33128625Ssteve ; 33228625Ssteve (void)ungetc(c, inf); 33328625Ssteve 3341590Srgrimes curline = lineno; 3351590Srgrimes for (sp = tok;;) { /* get next token */ 3361590Srgrimes if (GETC(==, EOF)) 3371590Srgrimes return; 3381590Srgrimes if (iswhite(c)) 3391590Srgrimes break; 34097574Stjr if (sp == tok + sizeof tok - 1) 34197574Stjr /* Too long -- truncate it */ 34297574Stjr *sp = EOS; 34397574Stjr else 34497574Stjr *sp++ = c; 3451590Srgrimes } 3461590Srgrimes *sp = EOS; 3471590Srgrimes if (memcmp(tok, "define", 6)) /* only interested in #define's */ 3481590Srgrimes goto skip; 3491590Srgrimes for (;;) { /* this doesn't handle "#define \n" */ 3501590Srgrimes if (GETC(==, EOF)) 3511590Srgrimes return; 3521590Srgrimes if (!iswhite(c)) 3531590Srgrimes break; 3541590Srgrimes } 3551590Srgrimes for (sp = tok;;) { /* get next token */ 35697574Stjr if (sp == tok + sizeof tok - 1) 35797574Stjr /* Too long -- truncate it */ 35897574Stjr *sp = EOS; 35997574Stjr else 36097574Stjr *sp++ = c; 3611590Srgrimes if (GETC(==, EOF)) 3621590Srgrimes return; 3631590Srgrimes /* 3641590Srgrimes * this is where it DOESN'T handle 3651590Srgrimes * "#define \n" 3661590Srgrimes */ 3671590Srgrimes if (!intoken(c)) 3681590Srgrimes break; 3691590Srgrimes } 3701590Srgrimes *sp = EOS; 3711590Srgrimes if (dflag || c == '(') { /* only want macros */ 3721590Srgrimes getline(); 3731590Srgrimes pfnote(tok, curline); 3741590Srgrimes } 3751590Srgrimesskip: if (c == '\n') { /* get rid of rest of define */ 3761590Srgrimes SETLINE 3771590Srgrimes if (*(sp - 1) != '\\') 3781590Srgrimes return; 3791590Srgrimes } 3801590Srgrimes (void)skip_key('\n'); 3811590Srgrimes} 3821590Srgrimes 3831590Srgrimes/* 3841590Srgrimes * str_entry -- 3851590Srgrimes * handle a struct, union or enum entry 3861590Srgrimes */ 3871590Srgrimesstatic int 388100822Sdwmalonestr_entry(int c) /* c is current character */ 3891590Srgrimes{ 3901590Srgrimes int curline; /* line started on */ 3911590Srgrimes char *sp; /* buffer pointer */ 3921590Srgrimes char tok[LINE_MAX]; /* storage buffer */ 3931590Srgrimes 3941590Srgrimes curline = lineno; 3951590Srgrimes while (iswhite(c)) 3961590Srgrimes if (GETC(==, EOF)) 3971590Srgrimes return (NO); 3981590Srgrimes if (c == '{') /* it was "struct {" */ 3991590Srgrimes return (YES); 4001590Srgrimes for (sp = tok;;) { /* get next token */ 40197574Stjr if (sp == tok + sizeof tok - 1) 40297574Stjr /* Too long -- truncate it */ 40397574Stjr *sp = EOS; 40497574Stjr else 40597574Stjr *sp++ = c; 4061590Srgrimes if (GETC(==, EOF)) 4071590Srgrimes return (NO); 4081590Srgrimes if (!intoken(c)) 4091590Srgrimes break; 4101590Srgrimes } 4111590Srgrimes switch (c) { 4121590Srgrimes case '{': /* it was "struct foo{" */ 4131590Srgrimes --sp; 4141590Srgrimes break; 4151590Srgrimes case '\n': /* it was "struct foo\n" */ 4161590Srgrimes SETLINE; 4171590Srgrimes /*FALLTHROUGH*/ 4181590Srgrimes default: /* probably "struct foo " */ 4191590Srgrimes while (GETC(!=, EOF)) 4201590Srgrimes if (!iswhite(c)) 4211590Srgrimes break; 4221590Srgrimes if (c != '{') { 4231590Srgrimes (void)ungetc(c, inf); 4241590Srgrimes return (NO); 4251590Srgrimes } 4261590Srgrimes } 4271590Srgrimes *sp = EOS; 4281590Srgrimes pfnote(tok, curline); 4291590Srgrimes return (YES); 4301590Srgrimes} 4311590Srgrimes 4321590Srgrimes/* 4331590Srgrimes * skip_comment -- 4341590Srgrimes * skip over comment 4351590Srgrimes */ 4361590Srgrimesvoid 437100822Sdwmaloneskip_comment(int t) /* t is comment character */ 4381590Srgrimes{ 4391590Srgrimes int c; /* character read */ 4401590Srgrimes int star; /* '*' flag */ 4411590Srgrimes 4421590Srgrimes for (star = 0; GETC(!=, EOF);) 4431590Srgrimes switch(c) { 4441590Srgrimes /* comments don't nest, nor can they be escaped. */ 4451590Srgrimes case '*': 4461590Srgrimes star = YES; 4471590Srgrimes break; 4481590Srgrimes case '/': 44991189Sgshapiro if (star && t == '*') 4501590Srgrimes return; 4511590Srgrimes break; 4521590Srgrimes case '\n': 45391189Sgshapiro if (t == '/') 45491189Sgshapiro return; 4551590Srgrimes SETLINE; 4561590Srgrimes /*FALLTHROUGH*/ 4571590Srgrimes default: 4581590Srgrimes star = NO; 4591590Srgrimes break; 4601590Srgrimes } 4611590Srgrimes} 4621590Srgrimes 4631590Srgrimes/* 4641590Srgrimes * skip_string -- 4651590Srgrimes * skip to the end of a string or character constant. 4661590Srgrimes */ 4671590Srgrimesvoid 468100822Sdwmaloneskip_string(int key) 4691590Srgrimes{ 4701590Srgrimes int c, 4711590Srgrimes skip; 4721590Srgrimes 4731590Srgrimes for (skip = NO; GETC(!=, EOF); ) 4741590Srgrimes switch (c) { 4751590Srgrimes case '\\': /* a backslash escapes anything */ 4761590Srgrimes skip = !skip; /* we toggle in case it's "\\" */ 4771590Srgrimes break; 4781590Srgrimes case '\n': 4791590Srgrimes SETLINE; 4801590Srgrimes /*FALLTHROUGH*/ 4811590Srgrimes default: 4821590Srgrimes if (c == key && !skip) 4831590Srgrimes return; 4841590Srgrimes skip = NO; 4851590Srgrimes } 4861590Srgrimes} 4871590Srgrimes 4881590Srgrimes/* 4891590Srgrimes * skip_key -- 4901590Srgrimes * skip to next char "key" 4911590Srgrimes */ 4921590Srgrimesint 493100822Sdwmaloneskip_key(int key) 4941590Srgrimes{ 4951590Srgrimes int c, 4961590Srgrimes skip, 4971590Srgrimes retval; 4981590Srgrimes 4991590Srgrimes for (skip = retval = NO; GETC(!=, EOF);) 5001590Srgrimes switch(c) { 5011590Srgrimes case '\\': /* a backslash escapes anything */ 5021590Srgrimes skip = !skip; /* we toggle in case it's "\\" */ 5031590Srgrimes break; 5041590Srgrimes case ';': /* special case for yacc; if one */ 5051590Srgrimes case '|': /* of these chars occurs, we may */ 5061590Srgrimes retval = YES; /* have moved out of the rule */ 5071590Srgrimes break; /* not used by C */ 5081590Srgrimes case '\'': 5091590Srgrimes case '"': 5101590Srgrimes /* skip strings and character constants */ 5111590Srgrimes skip_string(c); 5121590Srgrimes break; 5131590Srgrimes case '/': 5141590Srgrimes /* skip comments */ 51591189Sgshapiro if (GETC(==, '*') || c == '/') { 51691189Sgshapiro skip_comment(c); 5171590Srgrimes break; 5181590Srgrimes } 5191590Srgrimes (void)ungetc(c, inf); 5201590Srgrimes c = '/'; 5211590Srgrimes goto norm; 5221590Srgrimes case '\n': 5231590Srgrimes SETLINE; 5241590Srgrimes /*FALLTHROUGH*/ 5251590Srgrimes default: 5261590Srgrimes norm: 5271590Srgrimes if (c == key && !skip) 5281590Srgrimes return (retval); 5291590Srgrimes skip = NO; 5301590Srgrimes } 5311590Srgrimes return (retval); 5321590Srgrimes} 533