C.c revision 91382
1/*
2 * Copyright (c) 1987, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#if 0
35#ifndef lint
36static char sccsid[] = "@(#)C.c	8.4 (Berkeley) 4/2/94";
37#endif
38#endif
39
40#include <sys/cdefs.h>
41__FBSDID("$FreeBSD: head/usr.bin/ctags/C.c 91382 2002-02-27 14:56:58Z dwmalone $");
42
43#include <limits.h>
44#include <stdio.h>
45#include <string.h>
46
47#include "ctags.h"
48
49static int	func_entry __P((void));
50static void	hash_entry __P((void));
51static void	skip_string __P((int));
52static int	str_entry __P((int));
53
54/*
55 * c_entries --
56 *	read .c and .h files and call appropriate routines
57 */
58void
59c_entries()
60{
61	int	c;			/* current character */
62	int	level;			/* brace level */
63	int	token;			/* if reading a token */
64	int	t_def;			/* if reading a typedef */
65	int	t_level;		/* typedef's brace level */
66	char	*sp;			/* buffer pointer */
67	char	tok[MAXTOKEN];		/* token buffer */
68
69	lineftell = ftell(inf);
70	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
71	while (GETC(!=, EOF)) {
72		switch (c) {
73		/*
74		 * Here's where it DOESN'T handle: {
75		 *	foo(a)
76		 *	{
77		 *	#ifdef notdef
78		 *		}
79		 *	#endif
80		 *		if (a)
81		 *			puts("hello, world");
82		 *	}
83		 */
84		case '{':
85			++level;
86			goto endtok;
87		case '}':
88			/*
89			 * if level goes below zero, try and fix
90			 * it, even though we've already messed up
91			 */
92			if (--level < 0)
93				level = 0;
94			goto endtok;
95
96		case '\n':
97			SETLINE;
98			/*
99			 * the above 3 cases are similar in that they
100			 * are special characters that also end tokens.
101			 */
102	endtok:			if (sp > tok) {
103				*sp = EOS;
104				token = YES;
105				sp = tok;
106			}
107			else
108				token = NO;
109			continue;
110
111		/*
112		 * We ignore quoted strings and character constants
113		 * completely.
114		 */
115		case '"':
116		case '\'':
117			(void)skip_string(c);
118			break;
119
120		/*
121		 * comments can be fun; note the state is unchanged after
122		 * return, in case we found:
123		 *	"foo() XX comment XX { int bar; }"
124		 */
125		case '/':
126			if (GETC(==, '*') || c == '/') {
127				skip_comment(c);
128				continue;
129			}
130			(void)ungetc(c, inf);
131			c = '/';
132			goto storec;
133
134		/* hash marks flag #define's. */
135		case '#':
136			if (sp == tok) {
137				hash_entry();
138				break;
139			}
140			goto storec;
141
142		/*
143		 * if we have a current token, parenthesis on
144		 * level zero indicates a function.
145		 */
146		case '(':
147			if (!level && token) {
148				int	curline;
149
150				if (sp != tok)
151					*sp = EOS;
152				/*
153				 * grab the line immediately, we may
154				 * already be wrong, for example,
155				 *	foo\n
156				 *	(arg1,
157				 */
158				getline();
159				curline = lineno;
160				if (func_entry()) {
161					++level;
162					pfnote(tok, curline);
163				}
164				break;
165			}
166			goto storec;
167
168		/*
169		 * semi-colons indicate the end of a typedef; if we find a
170		 * typedef we search for the next semi-colon of the same
171		 * level as the typedef.  Ignoring "structs", they are
172		 * tricky, since you can find:
173		 *
174		 *	"typedef long time_t;"
175		 *	"typedef unsigned int u_int;"
176		 *	"typedef unsigned int u_int [10];"
177		 *
178		 * If looking at a typedef, we save a copy of the last token
179		 * found.  Then, when we find the ';' we take the current
180		 * token if it starts with a valid token name, else we take
181		 * the one we saved.  There's probably some reasonable
182		 * alternative to this...
183		 */
184		case ';':
185			if (t_def && level == t_level) {
186				t_def = NO;
187				getline();
188				if (sp != tok)
189					*sp = EOS;
190				pfnote(tok, lineno);
191				break;
192			}
193			goto storec;
194
195		/*
196		 * store characters until one that can't be part of a token
197		 * comes along; check the current token against certain
198		 * reserved words.
199		 */
200		default:
201			/* ignore whitespace */
202			if (c == ' ' || c == '\t') {
203				int save = c;
204				while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
205					;
206				if (c == EOF)
207					return;
208				(void)ungetc(c, inf);
209				c = save;
210			}
211	storec:		if (!intoken(c)) {
212				if (sp == tok)
213					break;
214				*sp = EOS;
215				if (tflag) {
216					/* no typedefs inside typedefs */
217					if (!t_def &&
218						   !memcmp(tok, "typedef",8)) {
219						t_def = YES;
220						t_level = level;
221						break;
222					}
223					/* catch "typedef struct" */
224					if ((!t_def || t_level < level)
225					    && (!memcmp(tok, "struct", 7)
226					    || !memcmp(tok, "union", 6)
227					    || !memcmp(tok, "enum", 5))) {
228						/*
229						 * get line immediately;
230						 * may change before '{'
231						 */
232						getline();
233						if (str_entry(c))
234							++level;
235						break;
236						/* } */
237					}
238				}
239				sp = tok;
240			}
241			else if (sp != tok || begtoken(c)) {
242				*sp++ = c;
243				token = YES;
244			}
245			continue;
246		}
247
248		sp = tok;
249		token = NO;
250	}
251}
252
253/*
254 * func_entry --
255 *	handle a function reference
256 */
257static int
258func_entry()
259{
260	int	c;			/* current character */
261	int	level = 0;		/* for matching '()' */
262
263	/*
264	 * Find the end of the assumed function declaration.
265	 * Note that ANSI C functions can have type definitions so keep
266	 * track of the parentheses nesting level.
267	 */
268	while (GETC(!=, EOF)) {
269		switch (c) {
270		case '\'':
271		case '"':
272			/* skip strings and character constants */
273			skip_string(c);
274			break;
275		case '/':
276			/* skip comments */
277			if (GETC(==, '*') || c == '/')
278				skip_comment(c);
279			break;
280		case '(':
281			level++;
282			break;
283		case ')':
284			if (level == 0)
285				goto fnd;
286			level--;
287			break;
288		case '\n':
289			SETLINE;
290		}
291	}
292	return (NO);
293fnd:
294	/*
295	 * we assume that the character after a function's right paren
296	 * is a token character if it's a function and a non-token
297	 * character if it's a declaration.  Comments don't count...
298	 */
299	for (;;) {
300		while (GETC(!=, EOF) && iswhite(c))
301			if (c == '\n')
302				SETLINE;
303		if (intoken(c) || c == '{')
304			break;
305		if (c == '/' && (GETC(==, '*') || c == '/'))
306			skip_comment(c);
307		else {				/* don't ever "read" '/' */
308			(void)ungetc(c, inf);
309			return (NO);
310		}
311	}
312	if (c != '{')
313		(void)skip_key('{');
314	return (YES);
315}
316
317/*
318 * hash_entry --
319 *	handle a line starting with a '#'
320 */
321static void
322hash_entry()
323{
324	int	c;			/* character read */
325	int	curline;		/* line started on */
326	char	*sp;			/* buffer pointer */
327	char	tok[MAXTOKEN];		/* storage buffer */
328
329	/* ignore leading whitespace */
330	while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
331		;
332	(void)ungetc(c, inf);
333
334	curline = lineno;
335	for (sp = tok;;) {		/* get next token */
336		if (GETC(==, EOF))
337			return;
338		if (iswhite(c))
339			break;
340		*sp++ = c;
341	}
342	*sp = EOS;
343	if (memcmp(tok, "define", 6))	/* only interested in #define's */
344		goto skip;
345	for (;;) {			/* this doesn't handle "#define \n" */
346		if (GETC(==, EOF))
347			return;
348		if (!iswhite(c))
349			break;
350	}
351	for (sp = tok;;) {		/* get next token */
352		*sp++ = c;
353		if (GETC(==, EOF))
354			return;
355		/*
356		 * this is where it DOESN'T handle
357		 * "#define \n"
358		 */
359		if (!intoken(c))
360			break;
361	}
362	*sp = EOS;
363	if (dflag || c == '(') {	/* only want macros */
364		getline();
365		pfnote(tok, curline);
366	}
367skip:	if (c == '\n') {		/* get rid of rest of define */
368		SETLINE
369		if (*(sp - 1) != '\\')
370			return;
371	}
372	(void)skip_key('\n');
373}
374
375/*
376 * str_entry --
377 *	handle a struct, union or enum entry
378 */
379static int
380str_entry(c)
381	int	c;			/* current character */
382{
383	int	curline;		/* line started on */
384	char	*sp;			/* buffer pointer */
385	char	tok[LINE_MAX];		/* storage buffer */
386
387	curline = lineno;
388	while (iswhite(c))
389		if (GETC(==, EOF))
390			return (NO);
391	if (c == '{')		/* it was "struct {" */
392		return (YES);
393	for (sp = tok;;) {		/* get next token */
394		*sp++ = c;
395		if (GETC(==, EOF))
396			return (NO);
397		if (!intoken(c))
398			break;
399	}
400	switch (c) {
401		case '{':		/* it was "struct foo{" */
402			--sp;
403			break;
404		case '\n':		/* it was "struct foo\n" */
405			SETLINE;
406			/*FALLTHROUGH*/
407		default:		/* probably "struct foo " */
408			while (GETC(!=, EOF))
409				if (!iswhite(c))
410					break;
411			if (c != '{') {
412				(void)ungetc(c, inf);
413				return (NO);
414			}
415	}
416	*sp = EOS;
417	pfnote(tok, curline);
418	return (YES);
419}
420
421/*
422 * skip_comment --
423 *	skip over comment
424 */
425void
426skip_comment(t)
427	int	t;			/* comment character */
428{
429	int	c;			/* character read */
430	int	star;			/* '*' flag */
431
432	for (star = 0; GETC(!=, EOF);)
433		switch(c) {
434		/* comments don't nest, nor can they be escaped. */
435		case '*':
436			star = YES;
437			break;
438		case '/':
439			if (star && t == '*')
440				return;
441			break;
442		case '\n':
443			if (t == '/')
444				return;
445			SETLINE;
446			/*FALLTHROUGH*/
447		default:
448			star = NO;
449			break;
450		}
451}
452
453/*
454 * skip_string --
455 *	skip to the end of a string or character constant.
456 */
457void
458skip_string(key)
459	int	key;
460{
461	int	c,
462		skip;
463
464	for (skip = NO; GETC(!=, EOF); )
465		switch (c) {
466		case '\\':		/* a backslash escapes anything */
467			skip = !skip;	/* we toggle in case it's "\\" */
468			break;
469		case '\n':
470			SETLINE;
471			/*FALLTHROUGH*/
472		default:
473			if (c == key && !skip)
474				return;
475			skip = NO;
476		}
477}
478
479/*
480 * skip_key --
481 *	skip to next char "key"
482 */
483int
484skip_key(key)
485	int	key;
486{
487	int	c,
488		skip,
489		retval;
490
491	for (skip = retval = NO; GETC(!=, EOF);)
492		switch(c) {
493		case '\\':		/* a backslash escapes anything */
494			skip = !skip;	/* we toggle in case it's "\\" */
495			break;
496		case ';':		/* special case for yacc; if one */
497		case '|':		/* of these chars occurs, we may */
498			retval = YES;	/* have moved out of the rule */
499			break;		/* not used by C */
500		case '\'':
501		case '"':
502			/* skip strings and character constants */
503			skip_string(c);
504			break;
505		case '/':
506			/* skip comments */
507			if (GETC(==, '*') || c == '/') {
508				skip_comment(c);
509				break;
510			}
511			(void)ungetc(c, inf);
512			c = '/';
513			goto norm;
514		case '\n':
515			SETLINE;
516			/*FALLTHROUGH*/
517		default:
518		norm:
519			if (c == key && !skip)
520				return (retval);
521			skip = NO;
522		}
523	return (retval);
524}
525