1/*	$NetBSD: C.c,v 1.18 2009/04/11 12:58:03 lukem Exp $	*/
2
3/*
4 * Copyright (c) 1987, 1993, 1994
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#if HAVE_NBTOOL_CONFIG_H
33#include "nbtool_config.h"
34#endif
35
36#include <sys/cdefs.h>
37#if defined(__RCSID) && !defined(lint)
38#if 0
39static char sccsid[] = "@(#)C.c	8.4 (Berkeley) 4/2/94";
40#else
41__RCSID("$NetBSD: C.c,v 1.18 2009/04/11 12:58:03 lukem Exp $");
42#endif
43#endif /* not lint */
44
45#include <limits.h>
46#include <stddef.h>
47#include <stdio.h>
48#include <string.h>
49
50#include "ctags.h"
51
52static int	func_entry(void);
53static void	hash_entry(void);
54static void	skip_string(int);
55static int	str_entry(int);
56
57/*
58 * c_entries --
59 *	read .c and .h files and call appropriate routines
60 */
61void
62c_entries(void)
63{
64	int	c;			/* current character */
65	int	level;			/* brace level */
66	int	token;			/* if reading a token */
67	int	t_def;			/* if reading a typedef */
68	int	t_level;		/* typedef's brace level */
69	char	*sp;			/* buffer pointer */
70	char	tok[MAXTOKEN];		/* token buffer */
71
72	lineftell = ftell(inf);
73	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
74	while (GETC(!=, EOF)) {
75		switch (c) {
76		/*
77		 * Here's where it DOESN'T handle: {
78		 *	foo(a)
79		 *	{
80		 *	#ifdef notdef
81		 *		}
82		 *	#endif
83		 *		if (a)
84		 *			puts("hello, world");
85		 *	}
86		 */
87		case '{':
88			++level;
89			goto endtok;
90		case '}':
91			/*
92			 * if level goes below zero, try and fix
93			 * it, even though we've already messed up
94			 */
95			if (--level < 0)
96				level = 0;
97			goto endtok;
98
99		case '\n':
100			SETLINE;
101			/*
102			 * the above 3 cases are similar in that they
103			 * are special characters that also end tokens.
104			 */
105	endtok:			if (sp > tok) {
106				*sp = EOS;
107				token = YES;
108				sp = tok;
109			}
110			else
111				token = NO;
112			continue;
113
114		/*
115		 * We ignore quoted strings and character constants
116		 * completely.
117		 */
118		case '"':
119		case '\'':
120			(void)skip_string(c);
121			break;
122
123		/*
124		 * comments can be fun; note the state is unchanged after
125		 * return, in case we found:
126		 *	"foo() XX comment XX { int bar; }"
127		 */
128		case '/':
129			if (GETC(==, '*')) {
130				skip_comment(c);
131				continue;
132			} else if (c == '/') {
133				skip_comment(c);
134				continue;
135			}
136			(void)ungetc(c, inf);
137			c = '/';
138			goto storec;
139
140		/* hash marks flag #define's. */
141		case '#':
142			if (sp == tok) {
143				hash_entry();
144				break;
145			}
146			goto storec;
147
148		/*
149		 * if we have a current token, parenthesis on
150		 * level zero indicates a function.
151		 */
152		case '(':
153			do c = getc(inf);
154			while (c != EOF && iswhite(c));
155			if (c == '*')
156				break;
157			if (c != EOF)
158				ungetc(c, inf);
159			if (!level && token) {
160				int	curline;
161
162				if (sp != tok)
163					*sp = EOS;
164				/*
165				 * grab the line immediately, we may
166				 * already be wrong, for example,
167				 *	foo\n
168				 *	(arg1,
169				 */
170				get_line();
171				curline = lineno;
172				if (func_entry()) {
173					++level;
174					pfnote(tok, curline);
175				}
176				break;
177			}
178			goto storec;
179
180		/*
181		 * semi-colons indicate the end of a typedef; if we find a
182		 * typedef we search for the next semi-colon of the same
183		 * level as the typedef.  Ignoring "structs", they are
184		 * tricky, since you can find:
185		 *
186		 *	"typedef long time_t;"
187		 *	"typedef unsigned int u_int;"
188		 *	"typedef unsigned int u_int [10];"
189		 *
190		 * If looking at a typedef, we save a copy of the last token
191		 * found.  Then, when we find the ';' we take the current
192		 * token if it starts with a valid token name, else we take
193		 * the one we saved.  There's probably some reasonable
194		 * alternative to this...
195		 */
196		case ';':
197			if (t_def && level == t_level) {
198				t_def = NO;
199				get_line();
200				if (sp != tok)
201					*sp = EOS;
202				pfnote(tok, lineno);
203				break;
204			}
205			goto storec;
206
207		/*
208		 * store characters until one that can't be part of a token
209		 * comes along; check the current token against certain
210		 * reserved words.
211		 */
212		default:
213	storec:		if (c == EOF)
214				break;
215			if (!intoken(c)) {
216				if (sp == tok)
217					break;
218				*sp = EOS;
219				if (tflag) {
220					/* no typedefs inside typedefs */
221					if (!t_def &&
222						   !memcmp(tok, "typedef",8)) {
223						t_def = YES;
224						t_level = level;
225						break;
226					}
227					/* catch "typedef struct" */
228					if ((!t_def || t_level <= level)
229					    && (!memcmp(tok, "struct", 7)
230					    || !memcmp(tok, "union", 6)
231					    || !memcmp(tok, "enum", 5))) {
232						/*
233						 * get line immediately;
234						 * may change before '{'
235						 */
236						get_line();
237						if (str_entry(c))
238							++level;
239						break;
240						/* } */
241					}
242				}
243				sp = tok;
244			}
245			else if (sp != tok || begtoken(c)) {
246				if (sp < tok + sizeof tok)
247					*sp++ = c;
248				token = YES;
249			}
250			continue;
251		}
252
253		sp = tok;
254		token = NO;
255	}
256}
257
258/*
259 * func_entry --
260 *	handle a function reference
261 */
262static int
263func_entry(void)
264{
265	int	c;			/* current character */
266	int	level = 0;		/* for matching '()' */
267	static char attribute[] = "__attribute__";
268	char	maybe_attribute[sizeof attribute + 1],
269		*anext;
270
271	/*
272	 * Find the end of the assumed function declaration.
273	 * Note that ANSI C functions can have type definitions so keep
274	 * track of the parentheses nesting level.
275	 */
276	while (GETC(!=, EOF)) {
277		switch (c) {
278		case '\'':
279		case '"':
280			/* skip strings and character constants */
281			skip_string(c);
282			break;
283		case '/':
284			/* skip comments */
285			if (GETC(==, '*'))
286				skip_comment(c);
287			else if (c == '/')
288				skip_comment(c);
289			break;
290		case '(':
291			level++;
292			break;
293		case ')':
294			if (level == 0)
295				goto fnd;
296			level--;
297			break;
298		case '\n':
299			SETLINE;
300		}
301	}
302	return (NO);
303fnd:
304	/*
305	 * we assume that the character after a function's right paren
306	 * is a token character if it's a function and a non-token
307	 * character if it's a declaration.  Comments don't count...
308	 */
309	for (anext = maybe_attribute;;) {
310		while (GETC(!=, EOF) && iswhite(c))
311			if (c == '\n')
312				SETLINE;
313		if (c == EOF)
314			return NO;
315		/*
316		 * Recognize the gnu __attribute__ extension, which would
317		 * otherwise make the heuristic test DTWT
318		 */
319		if (anext == maybe_attribute) {
320			if (intoken(c)) {
321				*anext++ = c;
322				continue;
323			}
324		} else {
325			if (intoken(c)) {
326				if (anext - maybe_attribute
327				 < (ptrdiff_t)(sizeof attribute - 1))
328					*anext++ = c;
329				else	break;
330				continue;
331			} else {
332				*anext++ = '\0';
333				if (strcmp(maybe_attribute, attribute) == 0) {
334					(void)ungetc(c, inf);
335					return NO;
336				}
337				break;
338			}
339		}
340		if (intoken(c) || c == '{')
341			break;
342		if (c == '/' && GETC(==, '*'))
343			skip_comment(c);
344		else if (c == '/')
345			skip_comment(c);
346		else {				/* don't ever "read" '/' */
347			(void)ungetc(c, inf);
348			return (NO);
349		}
350	}
351	if (c != '{')
352		(void)skip_key('{');
353	return (YES);
354}
355
356/*
357 * hash_entry --
358 *	handle a line starting with a '#'
359 */
360static void
361hash_entry(void)
362{
363	int	c;			/* character read */
364	int	curline;		/* line started on */
365	char	*sp;			/* buffer pointer */
366	char	tok[MAXTOKEN];		/* storage buffer */
367
368	curline = lineno;
369	do if (GETC(==, EOF))
370		return;
371	while(c != '\n' && iswhite(c));
372	ungetc(c, inf);
373	for (sp = tok;;) {		/* get next token */
374		if (GETC(==, EOF))
375			return;
376		if (iswhite(c))
377			break;
378		if (sp < tok + sizeof tok)
379			*sp++ = c;
380	}
381	if(sp >= tok + sizeof tok)
382		--sp;
383	*sp = EOS;
384	if (memcmp(tok, "define", 6))	/* only interested in #define's */
385		goto skip;
386	for (;;) {			/* this doesn't handle "#define \n" */
387		if (GETC(==, EOF))
388			return;
389		if (!iswhite(c))
390			break;
391	}
392	for (sp = tok;;) {		/* get next token */
393		if(sp < tok + sizeof tok)
394			*sp++ = c;
395		if (GETC(==, EOF))
396			return;
397		/*
398		 * this is where it DOESN'T handle
399		 * "#define \n"
400		 */
401		if (!intoken(c))
402			break;
403	}
404	if(sp >= tok + sizeof tok)
405		--sp;
406	*sp = EOS;
407	if (dflag || c == '(') {	/* only want macros */
408		get_line();
409		pfnote(tok, curline);
410	}
411skip:	if (c == '\n') {		/* get rid of rest of define */
412		SETLINE
413		if (*(sp - 1) != '\\')
414			return;
415	}
416	(void)skip_key('\n');
417}
418
419/*
420 * str_entry --
421 *	handle a struct, union or enum entry
422 */
423static int
424str_entry(int c /* current character */)
425{
426	int	curline;		/* line started on */
427	char	*sp;			/* buffer pointer */
428	char	tok[LINE_MAX];		/* storage buffer */
429
430	curline = lineno;
431	while (iswhite(c))
432		if (GETC(==, EOF))
433			return (NO);
434	if (c == '{')		/* it was "struct {" */
435		return (YES);
436	for (sp = tok;;) {		/* get next token */
437		*sp++ = c;
438		if (GETC(==, EOF))
439			return (NO);
440		if (!intoken(c))
441			break;
442	}
443	switch (c) {
444		case '{':		/* it was "struct foo{" */
445			--sp;
446			break;
447		case '\n':		/* it was "struct foo\n" */
448			SETLINE;
449			/*FALLTHROUGH*/
450		default:		/* probably "struct foo " */
451			while (GETC(!=, EOF))
452				if (!iswhite(c))
453					break;
454			if (c != '{') {
455				(void)ungetc(c, inf);
456				return (NO);
457			}
458	}
459	*sp = EOS;
460	pfnote(tok, curline);
461	return (YES);
462}
463
464/*
465 * skip_comment --
466 *	skip over comment
467 */
468void
469skip_comment(int commenttype)
470{
471	int	c;			/* character read */
472	int	star;			/* '*' flag */
473
474	for (star = 0; GETC(!=, EOF);)
475		switch(c) {
476		/* comments don't nest, nor can they be escaped. */
477		case '*':
478			star = YES;
479			break;
480		case '/':
481			if (commenttype == '*' && star)
482				return;
483			break;
484		case '\n':
485			if (commenttype == '/') {
486				/*
487				 * we don't really parse C, so sometimes it
488				 * is necessary to see the newline
489				 */
490				ungetc(c, inf);
491				return;
492			}
493			SETLINE;
494			/*FALLTHROUGH*/
495		default:
496			star = NO;
497			break;
498		}
499}
500
501/*
502 * skip_string --
503 *	skip to the end of a string or character constant.
504 */
505void
506skip_string(int key)
507{
508	int	c,
509		skip;
510
511	for (skip = NO; GETC(!=, EOF); )
512		switch (c) {
513		case '\\':		/* a backslash escapes anything */
514			skip = !skip;	/* we toggle in case it's "\\" */
515			break;
516		case '\n':
517			SETLINE;
518			/*FALLTHROUGH*/
519		default:
520			if (c == key && !skip)
521				return;
522			skip = NO;
523		}
524}
525
526/*
527 * skip_key --
528 *	skip to next char "key"
529 */
530int
531skip_key(int key)
532{
533	int	c,
534		skip,
535		retval;
536
537	for (skip = retval = NO; GETC(!=, EOF);)
538		switch(c) {
539		case '\\':		/* a backslash escapes anything */
540			skip = !skip;	/* we toggle in case it's "\\" */
541			break;
542		case ';':		/* special case for yacc; if one */
543		case '|':		/* of these chars occurs, we may */
544			retval = YES;	/* have moved out of the rule */
545			break;		/* not used by C */
546		case '\'':
547		case '"':
548			/* skip strings and character constants */
549			skip_string(c);
550			break;
551		case '/':
552			/* skip comments */
553			if (GETC(==, '*')) {
554				skip_comment(c);
555				break;
556			} else if (c == '/') {
557				skip_comment(c);
558				break;
559			}
560			(void)ungetc(c, inf);
561			c = '/';
562			goto norm;
563		case '\n':
564			SETLINE;
565			/*FALLTHROUGH*/
566		default:
567		norm:
568			if (c == key && !skip)
569				return (retval);
570			skip = NO;
571		}
572	return (retval);
573}
574