11590Srgrimes/*
21590Srgrimes * Copyright (c) 1987, 1993, 1994
31590Srgrimes *	The Regents of the University of California.  All rights reserved.
41590Srgrimes *
51590Srgrimes * Redistribution and use in source and binary forms, with or without
61590Srgrimes * modification, are permitted provided that the following conditions
71590Srgrimes * are met:
81590Srgrimes * 1. Redistributions of source code must retain the above copyright
91590Srgrimes *    notice, this list of conditions and the following disclaimer.
101590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111590Srgrimes *    notice, this list of conditions and the following disclaimer in the
121590Srgrimes *    documentation and/or other materials provided with the distribution.
131590Srgrimes * 4. Neither the name of the University nor the names of its contributors
141590Srgrimes *    may be used to endorse or promote products derived from this software
151590Srgrimes *    without specific prior written permission.
161590Srgrimes *
171590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
181590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
191590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
201590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
211590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
221590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
231590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
241590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
251590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
261590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
271590Srgrimes * SUCH DAMAGE.
281590Srgrimes */
291590Srgrimes
3087628Sdwmalone#if 0
311590Srgrimes#ifndef lint
321590Srgrimesstatic char sccsid[] = "@(#)C.c	8.4 (Berkeley) 4/2/94";
3328625Ssteve#endif
3487628Sdwmalone#endif
351590Srgrimes
3687628Sdwmalone#include <sys/cdefs.h>
3787628Sdwmalone__FBSDID("$FreeBSD$");
3887628Sdwmalone
391590Srgrimes#include <limits.h>
401590Srgrimes#include <stdio.h>
4191382Sdwmalone#include <string.h>
421590Srgrimes
431590Srgrimes#include "ctags.h"
441590Srgrimes
4592920Simpstatic int	func_entry(void);
4692920Simpstatic void	hash_entry(void);
4792920Simpstatic void	skip_string(int);
4892920Simpstatic int	str_entry(int);
491590Srgrimes
501590Srgrimes/*
511590Srgrimes * c_entries --
521590Srgrimes *	read .c and .h files and call appropriate routines
531590Srgrimes */
541590Srgrimesvoid
55100822Sdwmalonec_entries(void)
561590Srgrimes{
571590Srgrimes	int	c;			/* current character */
581590Srgrimes	int	level;			/* brace level */
591590Srgrimes	int	token;			/* if reading a token */
601590Srgrimes	int	t_def;			/* if reading a typedef */
611590Srgrimes	int	t_level;		/* typedef's brace level */
621590Srgrimes	char	*sp;			/* buffer pointer */
631590Srgrimes	char	tok[MAXTOKEN];		/* token buffer */
641590Srgrimes
651590Srgrimes	lineftell = ftell(inf);
661590Srgrimes	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
671590Srgrimes	while (GETC(!=, EOF)) {
681590Srgrimes		switch (c) {
691590Srgrimes		/*
701590Srgrimes		 * Here's where it DOESN'T handle: {
711590Srgrimes		 *	foo(a)
721590Srgrimes		 *	{
731590Srgrimes		 *	#ifdef notdef
741590Srgrimes		 *		}
751590Srgrimes		 *	#endif
761590Srgrimes		 *		if (a)
771590Srgrimes		 *			puts("hello, world");
781590Srgrimes		 *	}
791590Srgrimes		 */
801590Srgrimes		case '{':
811590Srgrimes			++level;
821590Srgrimes			goto endtok;
831590Srgrimes		case '}':
841590Srgrimes			/*
851590Srgrimes			 * if level goes below zero, try and fix
861590Srgrimes			 * it, even though we've already messed up
871590Srgrimes			 */
881590Srgrimes			if (--level < 0)
891590Srgrimes				level = 0;
901590Srgrimes			goto endtok;
911590Srgrimes
921590Srgrimes		case '\n':
931590Srgrimes			SETLINE;
941590Srgrimes			/*
951590Srgrimes			 * the above 3 cases are similar in that they
961590Srgrimes			 * are special characters that also end tokens.
971590Srgrimes			 */
981590Srgrimes	endtok:			if (sp > tok) {
991590Srgrimes				*sp = EOS;
1001590Srgrimes				token = YES;
1011590Srgrimes				sp = tok;
1021590Srgrimes			}
1031590Srgrimes			else
1041590Srgrimes				token = NO;
1051590Srgrimes			continue;
1061590Srgrimes
1071590Srgrimes		/*
1081590Srgrimes		 * We ignore quoted strings and character constants
1091590Srgrimes		 * completely.
1101590Srgrimes		 */
1111590Srgrimes		case '"':
1121590Srgrimes		case '\'':
113166503Srse			skip_string(c);
1141590Srgrimes			break;
1151590Srgrimes
1161590Srgrimes		/*
1171590Srgrimes		 * comments can be fun; note the state is unchanged after
1181590Srgrimes		 * return, in case we found:
1191590Srgrimes		 *	"foo() XX comment XX { int bar; }"
1201590Srgrimes		 */
1211590Srgrimes		case '/':
12291189Sgshapiro			if (GETC(==, '*') || c == '/') {
12391189Sgshapiro				skip_comment(c);
1241590Srgrimes				continue;
1251590Srgrimes			}
1261590Srgrimes			(void)ungetc(c, inf);
1271590Srgrimes			c = '/';
1281590Srgrimes			goto storec;
1291590Srgrimes
1301590Srgrimes		/* hash marks flag #define's. */
1311590Srgrimes		case '#':
1321590Srgrimes			if (sp == tok) {
1331590Srgrimes				hash_entry();
1341590Srgrimes				break;
1351590Srgrimes			}
1361590Srgrimes			goto storec;
1371590Srgrimes
1381590Srgrimes		/*
1391590Srgrimes		 * if we have a current token, parenthesis on
1401590Srgrimes		 * level zero indicates a function.
1411590Srgrimes		 */
1421590Srgrimes		case '(':
1431590Srgrimes			if (!level && token) {
1441590Srgrimes				int	curline;
1451590Srgrimes
1461590Srgrimes				if (sp != tok)
1471590Srgrimes					*sp = EOS;
1481590Srgrimes				/*
1491590Srgrimes				 * grab the line immediately, we may
1501590Srgrimes				 * already be wrong, for example,
1511590Srgrimes				 *	foo\n
1521590Srgrimes				 *	(arg1,
1531590Srgrimes				 */
1541590Srgrimes				getline();
1551590Srgrimes				curline = lineno;
1561590Srgrimes				if (func_entry()) {
1571590Srgrimes					++level;
1581590Srgrimes					pfnote(tok, curline);
1591590Srgrimes				}
1601590Srgrimes				break;
1611590Srgrimes			}
1621590Srgrimes			goto storec;
1631590Srgrimes
1641590Srgrimes		/*
1651590Srgrimes		 * semi-colons indicate the end of a typedef; if we find a
1661590Srgrimes		 * typedef we search for the next semi-colon of the same
1671590Srgrimes		 * level as the typedef.  Ignoring "structs", they are
1681590Srgrimes		 * tricky, since you can find:
1691590Srgrimes		 *
1701590Srgrimes		 *	"typedef long time_t;"
1711590Srgrimes		 *	"typedef unsigned int u_int;"
1721590Srgrimes		 *	"typedef unsigned int u_int [10];"
1731590Srgrimes		 *
1741590Srgrimes		 * If looking at a typedef, we save a copy of the last token
1751590Srgrimes		 * found.  Then, when we find the ';' we take the current
1761590Srgrimes		 * token if it starts with a valid token name, else we take
1771590Srgrimes		 * the one we saved.  There's probably some reasonable
1781590Srgrimes		 * alternative to this...
1791590Srgrimes		 */
1801590Srgrimes		case ';':
1811590Srgrimes			if (t_def && level == t_level) {
1821590Srgrimes				t_def = NO;
1831590Srgrimes				getline();
1841590Srgrimes				if (sp != tok)
1851590Srgrimes					*sp = EOS;
1861590Srgrimes				pfnote(tok, lineno);
1871590Srgrimes				break;
1881590Srgrimes			}
1891590Srgrimes			goto storec;
1901590Srgrimes
1911590Srgrimes		/*
1921590Srgrimes		 * store characters until one that can't be part of a token
1931590Srgrimes		 * comes along; check the current token against certain
1941590Srgrimes		 * reserved words.
1951590Srgrimes		 */
1961590Srgrimes		default:
19728625Ssteve			/* ignore whitespace */
19828625Ssteve			if (c == ' ' || c == '\t') {
19928625Ssteve				int save = c;
20028625Ssteve				while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
20128625Ssteve					;
20228625Ssteve				if (c == EOF)
20328625Ssteve					return;
20428625Ssteve				(void)ungetc(c, inf);
20528625Ssteve				c = save;
20628625Ssteve			}
2071590Srgrimes	storec:		if (!intoken(c)) {
2081590Srgrimes				if (sp == tok)
2091590Srgrimes					break;
2101590Srgrimes				*sp = EOS;
2111590Srgrimes				if (tflag) {
2121590Srgrimes					/* no typedefs inside typedefs */
2131590Srgrimes					if (!t_def &&
2141590Srgrimes						   !memcmp(tok, "typedef",8)) {
2151590Srgrimes						t_def = YES;
2161590Srgrimes						t_level = level;
2171590Srgrimes						break;
2181590Srgrimes					}
2191590Srgrimes					/* catch "typedef struct" */
2201590Srgrimes					if ((!t_def || t_level < level)
2211590Srgrimes					    && (!memcmp(tok, "struct", 7)
2221590Srgrimes					    || !memcmp(tok, "union", 6)
2231590Srgrimes					    || !memcmp(tok, "enum", 5))) {
2241590Srgrimes						/*
2251590Srgrimes						 * get line immediately;
2261590Srgrimes						 * may change before '{'
2271590Srgrimes						 */
2281590Srgrimes						getline();
2291590Srgrimes						if (str_entry(c))
2301590Srgrimes							++level;
2311590Srgrimes						break;
2321590Srgrimes						/* } */
2331590Srgrimes					}
2341590Srgrimes				}
2351590Srgrimes				sp = tok;
2361590Srgrimes			}
2371590Srgrimes			else if (sp != tok || begtoken(c)) {
23897574Stjr				if (sp == tok + sizeof tok - 1)
23997574Stjr					/* Too long -- truncate it */
24097574Stjr					*sp = EOS;
24197574Stjr				else
24297574Stjr					*sp++ = c;
2431590Srgrimes				token = YES;
2441590Srgrimes			}
2451590Srgrimes			continue;
2461590Srgrimes		}
2471590Srgrimes
2481590Srgrimes		sp = tok;
2491590Srgrimes		token = NO;
2501590Srgrimes	}
2511590Srgrimes}
2521590Srgrimes
2531590Srgrimes/*
2541590Srgrimes * func_entry --
2551590Srgrimes *	handle a function reference
2561590Srgrimes */
2571590Srgrimesstatic int
258100822Sdwmalonefunc_entry(void)
2591590Srgrimes{
2601590Srgrimes	int	c;			/* current character */
2611590Srgrimes	int	level = 0;		/* for matching '()' */
2621590Srgrimes
2631590Srgrimes	/*
2641590Srgrimes	 * Find the end of the assumed function declaration.
2651590Srgrimes	 * Note that ANSI C functions can have type definitions so keep
2661590Srgrimes	 * track of the parentheses nesting level.
2671590Srgrimes	 */
2681590Srgrimes	while (GETC(!=, EOF)) {
2691590Srgrimes		switch (c) {
2701590Srgrimes		case '\'':
2711590Srgrimes		case '"':
2721590Srgrimes			/* skip strings and character constants */
2731590Srgrimes			skip_string(c);
2741590Srgrimes			break;
2751590Srgrimes		case '/':
2761590Srgrimes			/* skip comments */
27791189Sgshapiro			if (GETC(==, '*') || c == '/')
27891189Sgshapiro				skip_comment(c);
2791590Srgrimes			break;
2801590Srgrimes		case '(':
2811590Srgrimes			level++;
2821590Srgrimes			break;
2831590Srgrimes		case ')':
2841590Srgrimes			if (level == 0)
2851590Srgrimes				goto fnd;
2861590Srgrimes			level--;
2871590Srgrimes			break;
2881590Srgrimes		case '\n':
2891590Srgrimes			SETLINE;
2901590Srgrimes		}
2911590Srgrimes	}
2921590Srgrimes	return (NO);
2931590Srgrimesfnd:
2941590Srgrimes	/*
2951590Srgrimes	 * we assume that the character after a function's right paren
2961590Srgrimes	 * is a token character if it's a function and a non-token
2971590Srgrimes	 * character if it's a declaration.  Comments don't count...
2981590Srgrimes	 */
2991590Srgrimes	for (;;) {
3001590Srgrimes		while (GETC(!=, EOF) && iswhite(c))
3011590Srgrimes			if (c == '\n')
3021590Srgrimes				SETLINE;
3031590Srgrimes		if (intoken(c) || c == '{')
3041590Srgrimes			break;
30591189Sgshapiro		if (c == '/' && (GETC(==, '*') || c == '/'))
30691189Sgshapiro			skip_comment(c);
3071590Srgrimes		else {				/* don't ever "read" '/' */
3081590Srgrimes			(void)ungetc(c, inf);
3091590Srgrimes			return (NO);
3101590Srgrimes		}
3111590Srgrimes	}
3121590Srgrimes	if (c != '{')
3131590Srgrimes		(void)skip_key('{');
3141590Srgrimes	return (YES);
3151590Srgrimes}
3161590Srgrimes
3171590Srgrimes/*
3181590Srgrimes * hash_entry --
3191590Srgrimes *	handle a line starting with a '#'
3201590Srgrimes */
3211590Srgrimesstatic void
322100822Sdwmalonehash_entry(void)
3231590Srgrimes{
3241590Srgrimes	int	c;			/* character read */
3251590Srgrimes	int	curline;		/* line started on */
3261590Srgrimes	char	*sp;			/* buffer pointer */
3271590Srgrimes	char	tok[MAXTOKEN];		/* storage buffer */
3281590Srgrimes
32928625Ssteve	/* ignore leading whitespace */
33028625Ssteve	while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
33128625Ssteve		;
33228625Ssteve	(void)ungetc(c, inf);
33328625Ssteve
3341590Srgrimes	curline = lineno;
3351590Srgrimes	for (sp = tok;;) {		/* get next token */
3361590Srgrimes		if (GETC(==, EOF))
3371590Srgrimes			return;
3381590Srgrimes		if (iswhite(c))
3391590Srgrimes			break;
34097574Stjr		if (sp == tok + sizeof tok - 1)
34197574Stjr			/* Too long -- truncate it */
34297574Stjr			*sp = EOS;
34397574Stjr		else
34497574Stjr			*sp++ = c;
3451590Srgrimes	}
3461590Srgrimes	*sp = EOS;
3471590Srgrimes	if (memcmp(tok, "define", 6))	/* only interested in #define's */
3481590Srgrimes		goto skip;
3491590Srgrimes	for (;;) {			/* this doesn't handle "#define \n" */
3501590Srgrimes		if (GETC(==, EOF))
3511590Srgrimes			return;
3521590Srgrimes		if (!iswhite(c))
3531590Srgrimes			break;
3541590Srgrimes	}
3551590Srgrimes	for (sp = tok;;) {		/* get next token */
35697574Stjr		if (sp == tok + sizeof tok - 1)
35797574Stjr			/* Too long -- truncate it */
35897574Stjr			*sp = EOS;
35997574Stjr		else
36097574Stjr			*sp++ = c;
3611590Srgrimes		if (GETC(==, EOF))
3621590Srgrimes			return;
3631590Srgrimes		/*
3641590Srgrimes		 * this is where it DOESN'T handle
3651590Srgrimes		 * "#define \n"
3661590Srgrimes		 */
3671590Srgrimes		if (!intoken(c))
3681590Srgrimes			break;
3691590Srgrimes	}
3701590Srgrimes	*sp = EOS;
3711590Srgrimes	if (dflag || c == '(') {	/* only want macros */
3721590Srgrimes		getline();
3731590Srgrimes		pfnote(tok, curline);
3741590Srgrimes	}
3751590Srgrimesskip:	if (c == '\n') {		/* get rid of rest of define */
3761590Srgrimes		SETLINE
3771590Srgrimes		if (*(sp - 1) != '\\')
3781590Srgrimes			return;
3791590Srgrimes	}
3801590Srgrimes	(void)skip_key('\n');
3811590Srgrimes}
3821590Srgrimes
3831590Srgrimes/*
3841590Srgrimes * str_entry --
3851590Srgrimes *	handle a struct, union or enum entry
3861590Srgrimes */
3871590Srgrimesstatic int
388100822Sdwmalonestr_entry(int c) /* c is current character */
3891590Srgrimes{
3901590Srgrimes	int	curline;		/* line started on */
3911590Srgrimes	char	*sp;			/* buffer pointer */
3921590Srgrimes	char	tok[LINE_MAX];		/* storage buffer */
3931590Srgrimes
3941590Srgrimes	curline = lineno;
3951590Srgrimes	while (iswhite(c))
3961590Srgrimes		if (GETC(==, EOF))
3971590Srgrimes			return (NO);
3981590Srgrimes	if (c == '{')		/* it was "struct {" */
3991590Srgrimes		return (YES);
4001590Srgrimes	for (sp = tok;;) {		/* get next token */
40197574Stjr		if (sp == tok + sizeof tok - 1)
40297574Stjr			/* Too long -- truncate it */
40397574Stjr			*sp = EOS;
40497574Stjr		else
40597574Stjr			*sp++ = c;
4061590Srgrimes		if (GETC(==, EOF))
4071590Srgrimes			return (NO);
4081590Srgrimes		if (!intoken(c))
4091590Srgrimes			break;
4101590Srgrimes	}
4111590Srgrimes	switch (c) {
4121590Srgrimes		case '{':		/* it was "struct foo{" */
4131590Srgrimes			--sp;
4141590Srgrimes			break;
4151590Srgrimes		case '\n':		/* it was "struct foo\n" */
4161590Srgrimes			SETLINE;
4171590Srgrimes			/*FALLTHROUGH*/
4181590Srgrimes		default:		/* probably "struct foo " */
4191590Srgrimes			while (GETC(!=, EOF))
4201590Srgrimes				if (!iswhite(c))
4211590Srgrimes					break;
4221590Srgrimes			if (c != '{') {
4231590Srgrimes				(void)ungetc(c, inf);
4241590Srgrimes				return (NO);
4251590Srgrimes			}
4261590Srgrimes	}
4271590Srgrimes	*sp = EOS;
4281590Srgrimes	pfnote(tok, curline);
4291590Srgrimes	return (YES);
4301590Srgrimes}
4311590Srgrimes
4321590Srgrimes/*
4331590Srgrimes * skip_comment --
4341590Srgrimes *	skip over comment
4351590Srgrimes */
4361590Srgrimesvoid
437100822Sdwmaloneskip_comment(int t) /* t is comment character */
4381590Srgrimes{
4391590Srgrimes	int	c;			/* character read */
4401590Srgrimes	int	star;			/* '*' flag */
4411590Srgrimes
4421590Srgrimes	for (star = 0; GETC(!=, EOF);)
4431590Srgrimes		switch(c) {
4441590Srgrimes		/* comments don't nest, nor can they be escaped. */
4451590Srgrimes		case '*':
4461590Srgrimes			star = YES;
4471590Srgrimes			break;
4481590Srgrimes		case '/':
44991189Sgshapiro			if (star && t == '*')
4501590Srgrimes				return;
4511590Srgrimes			break;
4521590Srgrimes		case '\n':
45391189Sgshapiro			if (t == '/')
45491189Sgshapiro				return;
4551590Srgrimes			SETLINE;
4561590Srgrimes			/*FALLTHROUGH*/
4571590Srgrimes		default:
4581590Srgrimes			star = NO;
4591590Srgrimes			break;
4601590Srgrimes		}
4611590Srgrimes}
4621590Srgrimes
4631590Srgrimes/*
4641590Srgrimes * skip_string --
4651590Srgrimes *	skip to the end of a string or character constant.
4661590Srgrimes */
4671590Srgrimesvoid
468100822Sdwmaloneskip_string(int key)
4691590Srgrimes{
4701590Srgrimes	int	c,
4711590Srgrimes		skip;
4721590Srgrimes
4731590Srgrimes	for (skip = NO; GETC(!=, EOF); )
4741590Srgrimes		switch (c) {
4751590Srgrimes		case '\\':		/* a backslash escapes anything */
4761590Srgrimes			skip = !skip;	/* we toggle in case it's "\\" */
4771590Srgrimes			break;
4781590Srgrimes		case '\n':
4791590Srgrimes			SETLINE;
4801590Srgrimes			/*FALLTHROUGH*/
4811590Srgrimes		default:
4821590Srgrimes			if (c == key && !skip)
4831590Srgrimes				return;
4841590Srgrimes			skip = NO;
4851590Srgrimes		}
4861590Srgrimes}
4871590Srgrimes
4881590Srgrimes/*
4891590Srgrimes * skip_key --
4901590Srgrimes *	skip to next char "key"
4911590Srgrimes */
4921590Srgrimesint
493100822Sdwmaloneskip_key(int key)
4941590Srgrimes{
4951590Srgrimes	int	c,
4961590Srgrimes		skip,
4971590Srgrimes		retval;
4981590Srgrimes
4991590Srgrimes	for (skip = retval = NO; GETC(!=, EOF);)
5001590Srgrimes		switch(c) {
5011590Srgrimes		case '\\':		/* a backslash escapes anything */
5021590Srgrimes			skip = !skip;	/* we toggle in case it's "\\" */
5031590Srgrimes			break;
5041590Srgrimes		case ';':		/* special case for yacc; if one */
5051590Srgrimes		case '|':		/* of these chars occurs, we may */
5061590Srgrimes			retval = YES;	/* have moved out of the rule */
5071590Srgrimes			break;		/* not used by C */
5081590Srgrimes		case '\'':
5091590Srgrimes		case '"':
5101590Srgrimes			/* skip strings and character constants */
5111590Srgrimes			skip_string(c);
5121590Srgrimes			break;
5131590Srgrimes		case '/':
5141590Srgrimes			/* skip comments */
51591189Sgshapiro			if (GETC(==, '*') || c == '/') {
51691189Sgshapiro				skip_comment(c);
5171590Srgrimes				break;
5181590Srgrimes			}
5191590Srgrimes			(void)ungetc(c, inf);
5201590Srgrimes			c = '/';
5211590Srgrimes			goto norm;
5221590Srgrimes		case '\n':
5231590Srgrimes			SETLINE;
5241590Srgrimes			/*FALLTHROUGH*/
5251590Srgrimes		default:
5261590Srgrimes		norm:
5271590Srgrimes			if (c == key && !skip)
5281590Srgrimes				return (retval);
5291590Srgrimes			skip = NO;
5301590Srgrimes		}
5311590Srgrimes	return (retval);
5321590Srgrimes}
533