C.c revision 1590
1/*
2 * Copyright (c) 1987, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#ifndef lint
35static char sccsid[] = "@(#)C.c	8.4 (Berkeley) 4/2/94";
36#endif /* not lint */
37
38#include <limits.h>
39#include <stdio.h>
40#include <string.h>
41
42#include "ctags.h"
43
44static int	func_entry __P((void));
45static void	hash_entry __P((void));
46static void	skip_string __P((int));
47static int	str_entry __P((int));
48
49/*
50 * c_entries --
51 *	read .c and .h files and call appropriate routines
52 */
53void
54c_entries()
55{
56	int	c;			/* current character */
57	int	level;			/* brace level */
58	int	token;			/* if reading a token */
59	int	t_def;			/* if reading a typedef */
60	int	t_level;		/* typedef's brace level */
61	char	*sp;			/* buffer pointer */
62	char	tok[MAXTOKEN];		/* token buffer */
63
64	lineftell = ftell(inf);
65	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
66	while (GETC(!=, EOF)) {
67		switch (c) {
68		/*
69		 * Here's where it DOESN'T handle: {
70		 *	foo(a)
71		 *	{
72		 *	#ifdef notdef
73		 *		}
74		 *	#endif
75		 *		if (a)
76		 *			puts("hello, world");
77		 *	}
78		 */
79		case '{':
80			++level;
81			goto endtok;
82		case '}':
83			/*
84			 * if level goes below zero, try and fix
85			 * it, even though we've already messed up
86			 */
87			if (--level < 0)
88				level = 0;
89			goto endtok;
90
91		case '\n':
92			SETLINE;
93			/*
94			 * the above 3 cases are similar in that they
95			 * are special characters that also end tokens.
96			 */
97	endtok:			if (sp > tok) {
98				*sp = EOS;
99				token = YES;
100				sp = tok;
101			}
102			else
103				token = NO;
104			continue;
105
106		/*
107		 * We ignore quoted strings and character constants
108		 * completely.
109		 */
110		case '"':
111		case '\'':
112			(void)skip_string(c);
113			break;
114
115		/*
116		 * comments can be fun; note the state is unchanged after
117		 * return, in case we found:
118		 *	"foo() XX comment XX { int bar; }"
119		 */
120		case '/':
121			if (GETC(==, '*')) {
122				skip_comment();
123				continue;
124			}
125			(void)ungetc(c, inf);
126			c = '/';
127			goto storec;
128
129		/* hash marks flag #define's. */
130		case '#':
131			if (sp == tok) {
132				hash_entry();
133				break;
134			}
135			goto storec;
136
137		/*
138		 * if we have a current token, parenthesis on
139		 * level zero indicates a function.
140		 */
141		case '(':
142			if (!level && token) {
143				int	curline;
144
145				if (sp != tok)
146					*sp = EOS;
147				/*
148				 * grab the line immediately, we may
149				 * already be wrong, for example,
150				 *	foo\n
151				 *	(arg1,
152				 */
153				getline();
154				curline = lineno;
155				if (func_entry()) {
156					++level;
157					pfnote(tok, curline);
158				}
159				break;
160			}
161			goto storec;
162
163		/*
164		 * semi-colons indicate the end of a typedef; if we find a
165		 * typedef we search for the next semi-colon of the same
166		 * level as the typedef.  Ignoring "structs", they are
167		 * tricky, since you can find:
168		 *
169		 *	"typedef long time_t;"
170		 *	"typedef unsigned int u_int;"
171		 *	"typedef unsigned int u_int [10];"
172		 *
173		 * If looking at a typedef, we save a copy of the last token
174		 * found.  Then, when we find the ';' we take the current
175		 * token if it starts with a valid token name, else we take
176		 * the one we saved.  There's probably some reasonable
177		 * alternative to this...
178		 */
179		case ';':
180			if (t_def && level == t_level) {
181				t_def = NO;
182				getline();
183				if (sp != tok)
184					*sp = EOS;
185				pfnote(tok, lineno);
186				break;
187			}
188			goto storec;
189
190		/*
191		 * store characters until one that can't be part of a token
192		 * comes along; check the current token against certain
193		 * reserved words.
194		 */
195		default:
196	storec:		if (!intoken(c)) {
197				if (sp == tok)
198					break;
199				*sp = EOS;
200				if (tflag) {
201					/* no typedefs inside typedefs */
202					if (!t_def &&
203						   !memcmp(tok, "typedef",8)) {
204						t_def = YES;
205						t_level = level;
206						break;
207					}
208					/* catch "typedef struct" */
209					if ((!t_def || t_level < level)
210					    && (!memcmp(tok, "struct", 7)
211					    || !memcmp(tok, "union", 6)
212					    || !memcmp(tok, "enum", 5))) {
213						/*
214						 * get line immediately;
215						 * may change before '{'
216						 */
217						getline();
218						if (str_entry(c))
219							++level;
220						break;
221						/* } */
222					}
223				}
224				sp = tok;
225			}
226			else if (sp != tok || begtoken(c)) {
227				*sp++ = c;
228				token = YES;
229			}
230			continue;
231		}
232
233		sp = tok;
234		token = NO;
235	}
236}
237
238/*
239 * func_entry --
240 *	handle a function reference
241 */
242static int
243func_entry()
244{
245	int	c;			/* current character */
246	int	level = 0;		/* for matching '()' */
247
248	/*
249	 * Find the end of the assumed function declaration.
250	 * Note that ANSI C functions can have type definitions so keep
251	 * track of the parentheses nesting level.
252	 */
253	while (GETC(!=, EOF)) {
254		switch (c) {
255		case '\'':
256		case '"':
257			/* skip strings and character constants */
258			skip_string(c);
259			break;
260		case '/':
261			/* skip comments */
262			if (GETC(==, '*'))
263				skip_comment();
264			break;
265		case '(':
266			level++;
267			break;
268		case ')':
269			if (level == 0)
270				goto fnd;
271			level--;
272			break;
273		case '\n':
274			SETLINE;
275		}
276	}
277	return (NO);
278fnd:
279	/*
280	 * we assume that the character after a function's right paren
281	 * is a token character if it's a function and a non-token
282	 * character if it's a declaration.  Comments don't count...
283	 */
284	for (;;) {
285		while (GETC(!=, EOF) && iswhite(c))
286			if (c == '\n')
287				SETLINE;
288		if (intoken(c) || c == '{')
289			break;
290		if (c == '/' && GETC(==, '*'))
291			skip_comment();
292		else {				/* don't ever "read" '/' */
293			(void)ungetc(c, inf);
294			return (NO);
295		}
296	}
297	if (c != '{')
298		(void)skip_key('{');
299	return (YES);
300}
301
302/*
303 * hash_entry --
304 *	handle a line starting with a '#'
305 */
306static void
307hash_entry()
308{
309	int	c;			/* character read */
310	int	curline;		/* line started on */
311	char	*sp;			/* buffer pointer */
312	char	tok[MAXTOKEN];		/* storage buffer */
313
314	curline = lineno;
315	for (sp = tok;;) {		/* get next token */
316		if (GETC(==, EOF))
317			return;
318		if (iswhite(c))
319			break;
320		*sp++ = c;
321	}
322	*sp = EOS;
323	if (memcmp(tok, "define", 6))	/* only interested in #define's */
324		goto skip;
325	for (;;) {			/* this doesn't handle "#define \n" */
326		if (GETC(==, EOF))
327			return;
328		if (!iswhite(c))
329			break;
330	}
331	for (sp = tok;;) {		/* get next token */
332		*sp++ = c;
333		if (GETC(==, EOF))
334			return;
335		/*
336		 * this is where it DOESN'T handle
337		 * "#define \n"
338		 */
339		if (!intoken(c))
340			break;
341	}
342	*sp = EOS;
343	if (dflag || c == '(') {	/* only want macros */
344		getline();
345		pfnote(tok, curline);
346	}
347skip:	if (c == '\n') {		/* get rid of rest of define */
348		SETLINE
349		if (*(sp - 1) != '\\')
350			return;
351	}
352	(void)skip_key('\n');
353}
354
355/*
356 * str_entry --
357 *	handle a struct, union or enum entry
358 */
359static int
360str_entry(c)
361	int	c;			/* current character */
362{
363	int	curline;		/* line started on */
364	char	*sp;			/* buffer pointer */
365	char	tok[LINE_MAX];		/* storage buffer */
366
367	curline = lineno;
368	while (iswhite(c))
369		if (GETC(==, EOF))
370			return (NO);
371	if (c == '{')		/* it was "struct {" */
372		return (YES);
373	for (sp = tok;;) {		/* get next token */
374		*sp++ = c;
375		if (GETC(==, EOF))
376			return (NO);
377		if (!intoken(c))
378			break;
379	}
380	switch (c) {
381		case '{':		/* it was "struct foo{" */
382			--sp;
383			break;
384		case '\n':		/* it was "struct foo\n" */
385			SETLINE;
386			/*FALLTHROUGH*/
387		default:		/* probably "struct foo " */
388			while (GETC(!=, EOF))
389				if (!iswhite(c))
390					break;
391			if (c != '{') {
392				(void)ungetc(c, inf);
393				return (NO);
394			}
395	}
396	*sp = EOS;
397	pfnote(tok, curline);
398	return (YES);
399}
400
401/*
402 * skip_comment --
403 *	skip over comment
404 */
405void
406skip_comment()
407{
408	int	c;			/* character read */
409	int	star;			/* '*' flag */
410
411	for (star = 0; GETC(!=, EOF);)
412		switch(c) {
413		/* comments don't nest, nor can they be escaped. */
414		case '*':
415			star = YES;
416			break;
417		case '/':
418			if (star)
419				return;
420			break;
421		case '\n':
422			SETLINE;
423			/*FALLTHROUGH*/
424		default:
425			star = NO;
426			break;
427		}
428}
429
430/*
431 * skip_string --
432 *	skip to the end of a string or character constant.
433 */
434void
435skip_string(key)
436	int	key;
437{
438	int	c,
439		skip;
440
441	for (skip = NO; GETC(!=, EOF); )
442		switch (c) {
443		case '\\':		/* a backslash escapes anything */
444			skip = !skip;	/* we toggle in case it's "\\" */
445			break;
446		case '\n':
447			SETLINE;
448			/*FALLTHROUGH*/
449		default:
450			if (c == key && !skip)
451				return;
452			skip = NO;
453		}
454}
455
456/*
457 * skip_key --
458 *	skip to next char "key"
459 */
460int
461skip_key(key)
462	int	key;
463{
464	int	c,
465		skip,
466		retval;
467
468	for (skip = retval = NO; GETC(!=, EOF);)
469		switch(c) {
470		case '\\':		/* a backslash escapes anything */
471			skip = !skip;	/* we toggle in case it's "\\" */
472			break;
473		case ';':		/* special case for yacc; if one */
474		case '|':		/* of these chars occurs, we may */
475			retval = YES;	/* have moved out of the rule */
476			break;		/* not used by C */
477		case '\'':
478		case '"':
479			/* skip strings and character constants */
480			skip_string(c);
481			break;
482		case '/':
483			/* skip comments */
484			if (GETC(==, '*')) {
485				skip_comment();
486				break;
487			}
488			(void)ungetc(c, inf);
489			c = '/';
490			goto norm;
491		case '\n':
492			SETLINE;
493			/*FALLTHROUGH*/
494		default:
495		norm:
496			if (c == key && !skip)
497				return (retval);
498			skip = NO;
499		}
500	return (retval);
501}
502