C.c revision 100822
1/*
2 * Copyright (c) 1987, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#if 0
35#ifndef lint
36static char sccsid[] = "@(#)C.c	8.4 (Berkeley) 4/2/94";
37#endif
38#endif
39
40#include <sys/cdefs.h>
41__FBSDID("$FreeBSD: head/usr.bin/ctags/C.c 100822 2002-07-28 15:50:38Z dwmalone $");
42
43#include <limits.h>
44#include <stdio.h>
45#include <string.h>
46
47#include "ctags.h"
48
49static int	func_entry(void);
50static void	hash_entry(void);
51static void	skip_string(int);
52static int	str_entry(int);
53
54/*
55 * c_entries --
56 *	read .c and .h files and call appropriate routines
57 */
58void
59c_entries(void)
60{
61	int	c;			/* current character */
62	int	level;			/* brace level */
63	int	token;			/* if reading a token */
64	int	t_def;			/* if reading a typedef */
65	int	t_level;		/* typedef's brace level */
66	char	*sp;			/* buffer pointer */
67	char	tok[MAXTOKEN];		/* token buffer */
68
69	lineftell = ftell(inf);
70	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
71	while (GETC(!=, EOF)) {
72		switch (c) {
73		/*
74		 * Here's where it DOESN'T handle: {
75		 *	foo(a)
76		 *	{
77		 *	#ifdef notdef
78		 *		}
79		 *	#endif
80		 *		if (a)
81		 *			puts("hello, world");
82		 *	}
83		 */
84		case '{':
85			++level;
86			goto endtok;
87		case '}':
88			/*
89			 * if level goes below zero, try and fix
90			 * it, even though we've already messed up
91			 */
92			if (--level < 0)
93				level = 0;
94			goto endtok;
95
96		case '\n':
97			SETLINE;
98			/*
99			 * the above 3 cases are similar in that they
100			 * are special characters that also end tokens.
101			 */
102	endtok:			if (sp > tok) {
103				*sp = EOS;
104				token = YES;
105				sp = tok;
106			}
107			else
108				token = NO;
109			continue;
110
111		/*
112		 * We ignore quoted strings and character constants
113		 * completely.
114		 */
115		case '"':
116		case '\'':
117			(void)skip_string(c);
118			break;
119
120		/*
121		 * comments can be fun; note the state is unchanged after
122		 * return, in case we found:
123		 *	"foo() XX comment XX { int bar; }"
124		 */
125		case '/':
126			if (GETC(==, '*') || c == '/') {
127				skip_comment(c);
128				continue;
129			}
130			(void)ungetc(c, inf);
131			c = '/';
132			goto storec;
133
134		/* hash marks flag #define's. */
135		case '#':
136			if (sp == tok) {
137				hash_entry();
138				break;
139			}
140			goto storec;
141
142		/*
143		 * if we have a current token, parenthesis on
144		 * level zero indicates a function.
145		 */
146		case '(':
147			if (!level && token) {
148				int	curline;
149
150				if (sp != tok)
151					*sp = EOS;
152				/*
153				 * grab the line immediately, we may
154				 * already be wrong, for example,
155				 *	foo\n
156				 *	(arg1,
157				 */
158				getline();
159				curline = lineno;
160				if (func_entry()) {
161					++level;
162					pfnote(tok, curline);
163				}
164				break;
165			}
166			goto storec;
167
168		/*
169		 * semi-colons indicate the end of a typedef; if we find a
170		 * typedef we search for the next semi-colon of the same
171		 * level as the typedef.  Ignoring "structs", they are
172		 * tricky, since you can find:
173		 *
174		 *	"typedef long time_t;"
175		 *	"typedef unsigned int u_int;"
176		 *	"typedef unsigned int u_int [10];"
177		 *
178		 * If looking at a typedef, we save a copy of the last token
179		 * found.  Then, when we find the ';' we take the current
180		 * token if it starts with a valid token name, else we take
181		 * the one we saved.  There's probably some reasonable
182		 * alternative to this...
183		 */
184		case ';':
185			if (t_def && level == t_level) {
186				t_def = NO;
187				getline();
188				if (sp != tok)
189					*sp = EOS;
190				pfnote(tok, lineno);
191				break;
192			}
193			goto storec;
194
195		/*
196		 * store characters until one that can't be part of a token
197		 * comes along; check the current token against certain
198		 * reserved words.
199		 */
200		default:
201			/* ignore whitespace */
202			if (c == ' ' || c == '\t') {
203				int save = c;
204				while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
205					;
206				if (c == EOF)
207					return;
208				(void)ungetc(c, inf);
209				c = save;
210			}
211	storec:		if (!intoken(c)) {
212				if (sp == tok)
213					break;
214				*sp = EOS;
215				if (tflag) {
216					/* no typedefs inside typedefs */
217					if (!t_def &&
218						   !memcmp(tok, "typedef",8)) {
219						t_def = YES;
220						t_level = level;
221						break;
222					}
223					/* catch "typedef struct" */
224					if ((!t_def || t_level < level)
225					    && (!memcmp(tok, "struct", 7)
226					    || !memcmp(tok, "union", 6)
227					    || !memcmp(tok, "enum", 5))) {
228						/*
229						 * get line immediately;
230						 * may change before '{'
231						 */
232						getline();
233						if (str_entry(c))
234							++level;
235						break;
236						/* } */
237					}
238				}
239				sp = tok;
240			}
241			else if (sp != tok || begtoken(c)) {
242				if (sp == tok + sizeof tok - 1)
243					/* Too long -- truncate it */
244					*sp = EOS;
245				else
246					*sp++ = c;
247				token = YES;
248			}
249			continue;
250		}
251
252		sp = tok;
253		token = NO;
254	}
255}
256
257/*
258 * func_entry --
259 *	handle a function reference
260 */
261static int
262func_entry(void)
263{
264	int	c;			/* current character */
265	int	level = 0;		/* for matching '()' */
266
267	/*
268	 * Find the end of the assumed function declaration.
269	 * Note that ANSI C functions can have type definitions so keep
270	 * track of the parentheses nesting level.
271	 */
272	while (GETC(!=, EOF)) {
273		switch (c) {
274		case '\'':
275		case '"':
276			/* skip strings and character constants */
277			skip_string(c);
278			break;
279		case '/':
280			/* skip comments */
281			if (GETC(==, '*') || c == '/')
282				skip_comment(c);
283			break;
284		case '(':
285			level++;
286			break;
287		case ')':
288			if (level == 0)
289				goto fnd;
290			level--;
291			break;
292		case '\n':
293			SETLINE;
294		}
295	}
296	return (NO);
297fnd:
298	/*
299	 * we assume that the character after a function's right paren
300	 * is a token character if it's a function and a non-token
301	 * character if it's a declaration.  Comments don't count...
302	 */
303	for (;;) {
304		while (GETC(!=, EOF) && iswhite(c))
305			if (c == '\n')
306				SETLINE;
307		if (intoken(c) || c == '{')
308			break;
309		if (c == '/' && (GETC(==, '*') || c == '/'))
310			skip_comment(c);
311		else {				/* don't ever "read" '/' */
312			(void)ungetc(c, inf);
313			return (NO);
314		}
315	}
316	if (c != '{')
317		(void)skip_key('{');
318	return (YES);
319}
320
321/*
322 * hash_entry --
323 *	handle a line starting with a '#'
324 */
325static void
326hash_entry(void)
327{
328	int	c;			/* character read */
329	int	curline;		/* line started on */
330	char	*sp;			/* buffer pointer */
331	char	tok[MAXTOKEN];		/* storage buffer */
332
333	/* ignore leading whitespace */
334	while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
335		;
336	(void)ungetc(c, inf);
337
338	curline = lineno;
339	for (sp = tok;;) {		/* get next token */
340		if (GETC(==, EOF))
341			return;
342		if (iswhite(c))
343			break;
344		if (sp == tok + sizeof tok - 1)
345			/* Too long -- truncate it */
346			*sp = EOS;
347		else
348			*sp++ = c;
349	}
350	*sp = EOS;
351	if (memcmp(tok, "define", 6))	/* only interested in #define's */
352		goto skip;
353	for (;;) {			/* this doesn't handle "#define \n" */
354		if (GETC(==, EOF))
355			return;
356		if (!iswhite(c))
357			break;
358	}
359	for (sp = tok;;) {		/* get next token */
360		if (sp == tok + sizeof tok - 1)
361			/* Too long -- truncate it */
362			*sp = EOS;
363		else
364			*sp++ = c;
365		if (GETC(==, EOF))
366			return;
367		/*
368		 * this is where it DOESN'T handle
369		 * "#define \n"
370		 */
371		if (!intoken(c))
372			break;
373	}
374	*sp = EOS;
375	if (dflag || c == '(') {	/* only want macros */
376		getline();
377		pfnote(tok, curline);
378	}
379skip:	if (c == '\n') {		/* get rid of rest of define */
380		SETLINE
381		if (*(sp - 1) != '\\')
382			return;
383	}
384	(void)skip_key('\n');
385}
386
387/*
388 * str_entry --
389 *	handle a struct, union or enum entry
390 */
391static int
392str_entry(int c) /* c is current character */
393{
394	int	curline;		/* line started on */
395	char	*sp;			/* buffer pointer */
396	char	tok[LINE_MAX];		/* storage buffer */
397
398	curline = lineno;
399	while (iswhite(c))
400		if (GETC(==, EOF))
401			return (NO);
402	if (c == '{')		/* it was "struct {" */
403		return (YES);
404	for (sp = tok;;) {		/* get next token */
405		if (sp == tok + sizeof tok - 1)
406			/* Too long -- truncate it */
407			*sp = EOS;
408		else
409			*sp++ = c;
410		if (GETC(==, EOF))
411			return (NO);
412		if (!intoken(c))
413			break;
414	}
415	switch (c) {
416		case '{':		/* it was "struct foo{" */
417			--sp;
418			break;
419		case '\n':		/* it was "struct foo\n" */
420			SETLINE;
421			/*FALLTHROUGH*/
422		default:		/* probably "struct foo " */
423			while (GETC(!=, EOF))
424				if (!iswhite(c))
425					break;
426			if (c != '{') {
427				(void)ungetc(c, inf);
428				return (NO);
429			}
430	}
431	*sp = EOS;
432	pfnote(tok, curline);
433	return (YES);
434}
435
436/*
437 * skip_comment --
438 *	skip over comment
439 */
440void
441skip_comment(int t) /* t is comment character */
442{
443	int	c;			/* character read */
444	int	star;			/* '*' flag */
445
446	for (star = 0; GETC(!=, EOF);)
447		switch(c) {
448		/* comments don't nest, nor can they be escaped. */
449		case '*':
450			star = YES;
451			break;
452		case '/':
453			if (star && t == '*')
454				return;
455			break;
456		case '\n':
457			if (t == '/')
458				return;
459			SETLINE;
460			/*FALLTHROUGH*/
461		default:
462			star = NO;
463			break;
464		}
465}
466
467/*
468 * skip_string --
469 *	skip to the end of a string or character constant.
470 */
471void
472skip_string(int key)
473{
474	int	c,
475		skip;
476
477	for (skip = NO; GETC(!=, EOF); )
478		switch (c) {
479		case '\\':		/* a backslash escapes anything */
480			skip = !skip;	/* we toggle in case it's "\\" */
481			break;
482		case '\n':
483			SETLINE;
484			/*FALLTHROUGH*/
485		default:
486			if (c == key && !skip)
487				return;
488			skip = NO;
489		}
490}
491
492/*
493 * skip_key --
494 *	skip to next char "key"
495 */
496int
497skip_key(int key)
498{
499	int	c,
500		skip,
501		retval;
502
503	for (skip = retval = NO; GETC(!=, EOF);)
504		switch(c) {
505		case '\\':		/* a backslash escapes anything */
506			skip = !skip;	/* we toggle in case it's "\\" */
507			break;
508		case ';':		/* special case for yacc; if one */
509		case '|':		/* of these chars occurs, we may */
510			retval = YES;	/* have moved out of the rule */
511			break;		/* not used by C */
512		case '\'':
513		case '"':
514			/* skip strings and character constants */
515			skip_string(c);
516			break;
517		case '/':
518			/* skip comments */
519			if (GETC(==, '*') || c == '/') {
520				skip_comment(c);
521				break;
522			}
523			(void)ungetc(c, inf);
524			c = '/';
525			goto norm;
526		case '\n':
527			SETLINE;
528			/*FALLTHROUGH*/
529		default:
530		norm:
531			if (c == key && !skip)
532				return (retval);
533			skip = NO;
534		}
535	return (retval);
536}
537