C.c revision 87750
1/*
2 * Copyright (c) 1987, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#if 0
35#ifndef lint
36static char sccsid[] = "@(#)C.c	8.4 (Berkeley) 4/2/94";
37#endif
38#endif
39
40#include <sys/cdefs.h>
41__FBSDID("$FreeBSD: head/usr.bin/ctags/C.c 87750 2001-12-12 18:24:42Z charnier $");
42
43#include <limits.h>
44#include <stdio.h>
45
46#include "ctags.h"
47
48static int	func_entry __P((void));
49static void	hash_entry __P((void));
50static void	skip_string __P((int));
51static int	str_entry __P((int));
52
53/*
54 * c_entries --
55 *	read .c and .h files and call appropriate routines
56 */
57void
58c_entries()
59{
60	int	c;			/* current character */
61	int	level;			/* brace level */
62	int	token;			/* if reading a token */
63	int	t_def;			/* if reading a typedef */
64	int	t_level;		/* typedef's brace level */
65	char	*sp;			/* buffer pointer */
66	char	tok[MAXTOKEN];		/* token buffer */
67
68	lineftell = ftell(inf);
69	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
70	while (GETC(!=, EOF)) {
71		switch (c) {
72		/*
73		 * Here's where it DOESN'T handle: {
74		 *	foo(a)
75		 *	{
76		 *	#ifdef notdef
77		 *		}
78		 *	#endif
79		 *		if (a)
80		 *			puts("hello, world");
81		 *	}
82		 */
83		case '{':
84			++level;
85			goto endtok;
86		case '}':
87			/*
88			 * if level goes below zero, try and fix
89			 * it, even though we've already messed up
90			 */
91			if (--level < 0)
92				level = 0;
93			goto endtok;
94
95		case '\n':
96			SETLINE;
97			/*
98			 * the above 3 cases are similar in that they
99			 * are special characters that also end tokens.
100			 */
101	endtok:			if (sp > tok) {
102				*sp = EOS;
103				token = YES;
104				sp = tok;
105			}
106			else
107				token = NO;
108			continue;
109
110		/*
111		 * We ignore quoted strings and character constants
112		 * completely.
113		 */
114		case '"':
115		case '\'':
116			(void)skip_string(c);
117			break;
118
119		/*
120		 * comments can be fun; note the state is unchanged after
121		 * return, in case we found:
122		 *	"foo() XX comment XX { int bar; }"
123		 */
124		case '/':
125			if (GETC(==, '*')) {
126				skip_comment();
127				continue;
128			}
129			(void)ungetc(c, inf);
130			c = '/';
131			goto storec;
132
133		/* hash marks flag #define's. */
134		case '#':
135			if (sp == tok) {
136				hash_entry();
137				break;
138			}
139			goto storec;
140
141		/*
142		 * if we have a current token, parenthesis on
143		 * level zero indicates a function.
144		 */
145		case '(':
146			if (!level && token) {
147				int	curline;
148
149				if (sp != tok)
150					*sp = EOS;
151				/*
152				 * grab the line immediately, we may
153				 * already be wrong, for example,
154				 *	foo\n
155				 *	(arg1,
156				 */
157				getline();
158				curline = lineno;
159				if (func_entry()) {
160					++level;
161					pfnote(tok, curline);
162				}
163				break;
164			}
165			goto storec;
166
167		/*
168		 * semi-colons indicate the end of a typedef; if we find a
169		 * typedef we search for the next semi-colon of the same
170		 * level as the typedef.  Ignoring "structs", they are
171		 * tricky, since you can find:
172		 *
173		 *	"typedef long time_t;"
174		 *	"typedef unsigned int u_int;"
175		 *	"typedef unsigned int u_int [10];"
176		 *
177		 * If looking at a typedef, we save a copy of the last token
178		 * found.  Then, when we find the ';' we take the current
179		 * token if it starts with a valid token name, else we take
180		 * the one we saved.  There's probably some reasonable
181		 * alternative to this...
182		 */
183		case ';':
184			if (t_def && level == t_level) {
185				t_def = NO;
186				getline();
187				if (sp != tok)
188					*sp = EOS;
189				pfnote(tok, lineno);
190				break;
191			}
192			goto storec;
193
194		/*
195		 * store characters until one that can't be part of a token
196		 * comes along; check the current token against certain
197		 * reserved words.
198		 */
199		default:
200			/* ignore whitespace */
201			if (c == ' ' || c == '\t') {
202				int save = c;
203				while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
204					;
205				if (c == EOF)
206					return;
207				(void)ungetc(c, inf);
208				c = save;
209			}
210	storec:		if (!intoken(c)) {
211				if (sp == tok)
212					break;
213				*sp = EOS;
214				if (tflag) {
215					/* no typedefs inside typedefs */
216					if (!t_def &&
217						   !memcmp(tok, "typedef",8)) {
218						t_def = YES;
219						t_level = level;
220						break;
221					}
222					/* catch "typedef struct" */
223					if ((!t_def || t_level < level)
224					    && (!memcmp(tok, "struct", 7)
225					    || !memcmp(tok, "union", 6)
226					    || !memcmp(tok, "enum", 5))) {
227						/*
228						 * get line immediately;
229						 * may change before '{'
230						 */
231						getline();
232						if (str_entry(c))
233							++level;
234						break;
235						/* } */
236					}
237				}
238				sp = tok;
239			}
240			else if (sp != tok || begtoken(c)) {
241				*sp++ = c;
242				token = YES;
243			}
244			continue;
245		}
246
247		sp = tok;
248		token = NO;
249	}
250}
251
252/*
253 * func_entry --
254 *	handle a function reference
255 */
256static int
257func_entry()
258{
259	int	c;			/* current character */
260	int	level = 0;		/* for matching '()' */
261
262	/*
263	 * Find the end of the assumed function declaration.
264	 * Note that ANSI C functions can have type definitions so keep
265	 * track of the parentheses nesting level.
266	 */
267	while (GETC(!=, EOF)) {
268		switch (c) {
269		case '\'':
270		case '"':
271			/* skip strings and character constants */
272			skip_string(c);
273			break;
274		case '/':
275			/* skip comments */
276			if (GETC(==, '*'))
277				skip_comment();
278			break;
279		case '(':
280			level++;
281			break;
282		case ')':
283			if (level == 0)
284				goto fnd;
285			level--;
286			break;
287		case '\n':
288			SETLINE;
289		}
290	}
291	return (NO);
292fnd:
293	/*
294	 * we assume that the character after a function's right paren
295	 * is a token character if it's a function and a non-token
296	 * character if it's a declaration.  Comments don't count...
297	 */
298	for (;;) {
299		while (GETC(!=, EOF) && iswhite(c))
300			if (c == '\n')
301				SETLINE;
302		if (intoken(c) || c == '{')
303			break;
304		if (c == '/' && GETC(==, '*'))
305			skip_comment();
306		else {				/* don't ever "read" '/' */
307			(void)ungetc(c, inf);
308			return (NO);
309		}
310	}
311	if (c != '{')
312		(void)skip_key('{');
313	return (YES);
314}
315
316/*
317 * hash_entry --
318 *	handle a line starting with a '#'
319 */
320static void
321hash_entry()
322{
323	int	c;			/* character read */
324	int	curline;		/* line started on */
325	char	*sp;			/* buffer pointer */
326	char	tok[MAXTOKEN];		/* storage buffer */
327
328	/* ignore leading whitespace */
329	while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
330		;
331	(void)ungetc(c, inf);
332
333	curline = lineno;
334	for (sp = tok;;) {		/* get next token */
335		if (GETC(==, EOF))
336			return;
337		if (iswhite(c))
338			break;
339		*sp++ = c;
340	}
341	*sp = EOS;
342	if (memcmp(tok, "define", 6))	/* only interested in #define's */
343		goto skip;
344	for (;;) {			/* this doesn't handle "#define \n" */
345		if (GETC(==, EOF))
346			return;
347		if (!iswhite(c))
348			break;
349	}
350	for (sp = tok;;) {		/* get next token */
351		*sp++ = c;
352		if (GETC(==, EOF))
353			return;
354		/*
355		 * this is where it DOESN'T handle
356		 * "#define \n"
357		 */
358		if (!intoken(c))
359			break;
360	}
361	*sp = EOS;
362	if (dflag || c == '(') {	/* only want macros */
363		getline();
364		pfnote(tok, curline);
365	}
366skip:	if (c == '\n') {		/* get rid of rest of define */
367		SETLINE
368		if (*(sp - 1) != '\\')
369			return;
370	}
371	(void)skip_key('\n');
372}
373
374/*
375 * str_entry --
376 *	handle a struct, union or enum entry
377 */
378static int
379str_entry(c)
380	int	c;			/* current character */
381{
382	int	curline;		/* line started on */
383	char	*sp;			/* buffer pointer */
384	char	tok[LINE_MAX];		/* storage buffer */
385
386	curline = lineno;
387	while (iswhite(c))
388		if (GETC(==, EOF))
389			return (NO);
390	if (c == '{')		/* it was "struct {" */
391		return (YES);
392	for (sp = tok;;) {		/* get next token */
393		*sp++ = c;
394		if (GETC(==, EOF))
395			return (NO);
396		if (!intoken(c))
397			break;
398	}
399	switch (c) {
400		case '{':		/* it was "struct foo{" */
401			--sp;
402			break;
403		case '\n':		/* it was "struct foo\n" */
404			SETLINE;
405			/*FALLTHROUGH*/
406		default:		/* probably "struct foo " */
407			while (GETC(!=, EOF))
408				if (!iswhite(c))
409					break;
410			if (c != '{') {
411				(void)ungetc(c, inf);
412				return (NO);
413			}
414	}
415	*sp = EOS;
416	pfnote(tok, curline);
417	return (YES);
418}
419
420/*
421 * skip_comment --
422 *	skip over comment
423 */
424void
425skip_comment()
426{
427	int	c;			/* character read */
428	int	star;			/* '*' flag */
429
430	for (star = 0; GETC(!=, EOF);)
431		switch(c) {
432		/* comments don't nest, nor can they be escaped. */
433		case '*':
434			star = YES;
435			break;
436		case '/':
437			if (star)
438				return;
439			break;
440		case '\n':
441			SETLINE;
442			/*FALLTHROUGH*/
443		default:
444			star = NO;
445			break;
446		}
447}
448
449/*
450 * skip_string --
451 *	skip to the end of a string or character constant.
452 */
453void
454skip_string(key)
455	int	key;
456{
457	int	c,
458		skip;
459
460	for (skip = NO; GETC(!=, EOF); )
461		switch (c) {
462		case '\\':		/* a backslash escapes anything */
463			skip = !skip;	/* we toggle in case it's "\\" */
464			break;
465		case '\n':
466			SETLINE;
467			/*FALLTHROUGH*/
468		default:
469			if (c == key && !skip)
470				return;
471			skip = NO;
472		}
473}
474
475/*
476 * skip_key --
477 *	skip to next char "key"
478 */
479int
480skip_key(key)
481	int	key;
482{
483	int	c,
484		skip,
485		retval;
486
487	for (skip = retval = NO; GETC(!=, EOF);)
488		switch(c) {
489		case '\\':		/* a backslash escapes anything */
490			skip = !skip;	/* we toggle in case it's "\\" */
491			break;
492		case ';':		/* special case for yacc; if one */
493		case '|':		/* of these chars occurs, we may */
494			retval = YES;	/* have moved out of the rule */
495			break;		/* not used by C */
496		case '\'':
497		case '"':
498			/* skip strings and character constants */
499			skip_string(c);
500			break;
501		case '/':
502			/* skip comments */
503			if (GETC(==, '*')) {
504				skip_comment();
505				break;
506			}
507			(void)ungetc(c, inf);
508			c = '/';
509			goto norm;
510		case '\n':
511			SETLINE;
512			/*FALLTHROUGH*/
513		default:
514		norm:
515			if (c == key && !skip)
516				return (retval);
517			skip = NO;
518		}
519	return (retval);
520}
521