1/*
2 * Copyright (c) 1987, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#if 0
31#ifndef lint
32static char sccsid[] = "@(#)C.c	8.4 (Berkeley) 4/2/94";
33#endif
34#endif
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD$");
38
39#include <limits.h>
40#include <stdio.h>
41#include <string.h>
42
43#include "ctags.h"
44
45static int	func_entry(void);
46static void	hash_entry(void);
47static void	skip_string(int);
48static int	str_entry(int);
49
50/*
51 * c_entries --
52 *	read .c and .h files and call appropriate routines
53 */
54void
55c_entries(void)
56{
57	int	c;			/* current character */
58	int	level;			/* brace level */
59	int	token;			/* if reading a token */
60	int	t_def;			/* if reading a typedef */
61	int	t_level;		/* typedef's brace level */
62	char	*sp;			/* buffer pointer */
63	char	tok[MAXTOKEN];		/* token buffer */
64
65	lineftell = ftell(inf);
66	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
67	while (GETC(!=, EOF)) {
68		switch (c) {
69		/*
70		 * Here's where it DOESN'T handle: {
71		 *	foo(a)
72		 *	{
73		 *	#ifdef notdef
74		 *		}
75		 *	#endif
76		 *		if (a)
77		 *			puts("hello, world");
78		 *	}
79		 */
80		case '{':
81			++level;
82			goto endtok;
83		case '}':
84			/*
85			 * if level goes below zero, try and fix
86			 * it, even though we've already messed up
87			 */
88			if (--level < 0)
89				level = 0;
90			goto endtok;
91
92		case '\n':
93			SETLINE;
94			/*
95			 * the above 3 cases are similar in that they
96			 * are special characters that also end tokens.
97			 */
98	endtok:			if (sp > tok) {
99				*sp = EOS;
100				token = YES;
101				sp = tok;
102			}
103			else
104				token = NO;
105			continue;
106
107		/*
108		 * We ignore quoted strings and character constants
109		 * completely.
110		 */
111		case '"':
112		case '\'':
113			skip_string(c);
114			break;
115
116		/*
117		 * comments can be fun; note the state is unchanged after
118		 * return, in case we found:
119		 *	"foo() XX comment XX { int bar; }"
120		 */
121		case '/':
122			if (GETC(==, '*') || c == '/') {
123				skip_comment(c);
124				continue;
125			}
126			(void)ungetc(c, inf);
127			c = '/';
128			goto storec;
129
130		/* hash marks flag #define's. */
131		case '#':
132			if (sp == tok) {
133				hash_entry();
134				break;
135			}
136			goto storec;
137
138		/*
139		 * if we have a current token, parenthesis on
140		 * level zero indicates a function.
141		 */
142		case '(':
143			if (!level && token) {
144				int	curline;
145
146				if (sp != tok)
147					*sp = EOS;
148				/*
149				 * grab the line immediately, we may
150				 * already be wrong, for example,
151				 *	foo\n
152				 *	(arg1,
153				 */
154				getline();
155				curline = lineno;
156				if (func_entry()) {
157					++level;
158					pfnote(tok, curline);
159				}
160				break;
161			}
162			goto storec;
163
164		/*
165		 * semi-colons indicate the end of a typedef; if we find a
166		 * typedef we search for the next semi-colon of the same
167		 * level as the typedef.  Ignoring "structs", they are
168		 * tricky, since you can find:
169		 *
170		 *	"typedef long time_t;"
171		 *	"typedef unsigned int u_int;"
172		 *	"typedef unsigned int u_int [10];"
173		 *
174		 * If looking at a typedef, we save a copy of the last token
175		 * found.  Then, when we find the ';' we take the current
176		 * token if it starts with a valid token name, else we take
177		 * the one we saved.  There's probably some reasonable
178		 * alternative to this...
179		 */
180		case ';':
181			if (t_def && level == t_level) {
182				t_def = NO;
183				getline();
184				if (sp != tok)
185					*sp = EOS;
186				pfnote(tok, lineno);
187				break;
188			}
189			goto storec;
190
191		/*
192		 * store characters until one that can't be part of a token
193		 * comes along; check the current token against certain
194		 * reserved words.
195		 */
196		default:
197			/* ignore whitespace */
198			if (c == ' ' || c == '\t') {
199				int save = c;
200				while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
201					;
202				if (c == EOF)
203					return;
204				(void)ungetc(c, inf);
205				c = save;
206			}
207	storec:		if (!intoken(c)) {
208				if (sp == tok)
209					break;
210				*sp = EOS;
211				if (tflag) {
212					/* no typedefs inside typedefs */
213					if (!t_def &&
214						   !memcmp(tok, "typedef",8)) {
215						t_def = YES;
216						t_level = level;
217						break;
218					}
219					/* catch "typedef struct" */
220					if ((!t_def || t_level < level)
221					    && (!memcmp(tok, "struct", 7)
222					    || !memcmp(tok, "union", 6)
223					    || !memcmp(tok, "enum", 5))) {
224						/*
225						 * get line immediately;
226						 * may change before '{'
227						 */
228						getline();
229						if (str_entry(c))
230							++level;
231						break;
232						/* } */
233					}
234				}
235				sp = tok;
236			}
237			else if (sp != tok || begtoken(c)) {
238				if (sp == tok + sizeof tok - 1)
239					/* Too long -- truncate it */
240					*sp = EOS;
241				else
242					*sp++ = c;
243				token = YES;
244			}
245			continue;
246		}
247
248		sp = tok;
249		token = NO;
250	}
251}
252
253/*
254 * func_entry --
255 *	handle a function reference
256 */
257static int
258func_entry(void)
259{
260	int	c;			/* current character */
261	int	level = 0;		/* for matching '()' */
262
263	/*
264	 * Find the end of the assumed function declaration.
265	 * Note that ANSI C functions can have type definitions so keep
266	 * track of the parentheses nesting level.
267	 */
268	while (GETC(!=, EOF)) {
269		switch (c) {
270		case '\'':
271		case '"':
272			/* skip strings and character constants */
273			skip_string(c);
274			break;
275		case '/':
276			/* skip comments */
277			if (GETC(==, '*') || c == '/')
278				skip_comment(c);
279			break;
280		case '(':
281			level++;
282			break;
283		case ')':
284			if (level == 0)
285				goto fnd;
286			level--;
287			break;
288		case '\n':
289			SETLINE;
290		}
291	}
292	return (NO);
293fnd:
294	/*
295	 * we assume that the character after a function's right paren
296	 * is a token character if it's a function and a non-token
297	 * character if it's a declaration.  Comments don't count...
298	 */
299	for (;;) {
300		while (GETC(!=, EOF) && iswhite(c))
301			if (c == '\n')
302				SETLINE;
303		if (intoken(c) || c == '{')
304			break;
305		if (c == '/' && (GETC(==, '*') || c == '/'))
306			skip_comment(c);
307		else {				/* don't ever "read" '/' */
308			(void)ungetc(c, inf);
309			return (NO);
310		}
311	}
312	if (c != '{')
313		(void)skip_key('{');
314	return (YES);
315}
316
317/*
318 * hash_entry --
319 *	handle a line starting with a '#'
320 */
321static void
322hash_entry(void)
323{
324	int	c;			/* character read */
325	int	curline;		/* line started on */
326	char	*sp;			/* buffer pointer */
327	char	tok[MAXTOKEN];		/* storage buffer */
328
329	/* ignore leading whitespace */
330	while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
331		;
332	(void)ungetc(c, inf);
333
334	curline = lineno;
335	for (sp = tok;;) {		/* get next token */
336		if (GETC(==, EOF))
337			return;
338		if (iswhite(c))
339			break;
340		if (sp == tok + sizeof tok - 1)
341			/* Too long -- truncate it */
342			*sp = EOS;
343		else
344			*sp++ = c;
345	}
346	*sp = EOS;
347	if (memcmp(tok, "define", 6))	/* only interested in #define's */
348		goto skip;
349	for (;;) {			/* this doesn't handle "#define \n" */
350		if (GETC(==, EOF))
351			return;
352		if (!iswhite(c))
353			break;
354	}
355	for (sp = tok;;) {		/* get next token */
356		if (sp == tok + sizeof tok - 1)
357			/* Too long -- truncate it */
358			*sp = EOS;
359		else
360			*sp++ = c;
361		if (GETC(==, EOF))
362			return;
363		/*
364		 * this is where it DOESN'T handle
365		 * "#define \n"
366		 */
367		if (!intoken(c))
368			break;
369	}
370	*sp = EOS;
371	if (dflag || c == '(') {	/* only want macros */
372		getline();
373		pfnote(tok, curline);
374	}
375skip:	if (c == '\n') {		/* get rid of rest of define */
376		SETLINE
377		if (*(sp - 1) != '\\')
378			return;
379	}
380	(void)skip_key('\n');
381}
382
383/*
384 * str_entry --
385 *	handle a struct, union or enum entry
386 */
387static int
388str_entry(int c) /* c is current character */
389{
390	int	curline;		/* line started on */
391	char	*sp;			/* buffer pointer */
392	char	tok[LINE_MAX];		/* storage buffer */
393
394	curline = lineno;
395	while (iswhite(c))
396		if (GETC(==, EOF))
397			return (NO);
398	if (c == '{')		/* it was "struct {" */
399		return (YES);
400	for (sp = tok;;) {		/* get next token */
401		if (sp == tok + sizeof tok - 1)
402			/* Too long -- truncate it */
403			*sp = EOS;
404		else
405			*sp++ = c;
406		if (GETC(==, EOF))
407			return (NO);
408		if (!intoken(c))
409			break;
410	}
411	switch (c) {
412		case '{':		/* it was "struct foo{" */
413			--sp;
414			break;
415		case '\n':		/* it was "struct foo\n" */
416			SETLINE;
417			/*FALLTHROUGH*/
418		default:		/* probably "struct foo " */
419			while (GETC(!=, EOF))
420				if (!iswhite(c))
421					break;
422			if (c != '{') {
423				(void)ungetc(c, inf);
424				return (NO);
425			}
426	}
427	*sp = EOS;
428	pfnote(tok, curline);
429	return (YES);
430}
431
432/*
433 * skip_comment --
434 *	skip over comment
435 */
436void
437skip_comment(int t) /* t is comment character */
438{
439	int	c;			/* character read */
440	int	star;			/* '*' flag */
441
442	for (star = 0; GETC(!=, EOF);)
443		switch(c) {
444		/* comments don't nest, nor can they be escaped. */
445		case '*':
446			star = YES;
447			break;
448		case '/':
449			if (star && t == '*')
450				return;
451			break;
452		case '\n':
453			if (t == '/')
454				return;
455			SETLINE;
456			/*FALLTHROUGH*/
457		default:
458			star = NO;
459			break;
460		}
461}
462
463/*
464 * skip_string --
465 *	skip to the end of a string or character constant.
466 */
467void
468skip_string(int key)
469{
470	int	c,
471		skip;
472
473	for (skip = NO; GETC(!=, EOF); )
474		switch (c) {
475		case '\\':		/* a backslash escapes anything */
476			skip = !skip;	/* we toggle in case it's "\\" */
477			break;
478		case '\n':
479			SETLINE;
480			/*FALLTHROUGH*/
481		default:
482			if (c == key && !skip)
483				return;
484			skip = NO;
485		}
486}
487
488/*
489 * skip_key --
490 *	skip to next char "key"
491 */
492int
493skip_key(int key)
494{
495	int	c,
496		skip,
497		retval;
498
499	for (skip = retval = NO; GETC(!=, EOF);)
500		switch(c) {
501		case '\\':		/* a backslash escapes anything */
502			skip = !skip;	/* we toggle in case it's "\\" */
503			break;
504		case ';':		/* special case for yacc; if one */
505		case '|':		/* of these chars occurs, we may */
506			retval = YES;	/* have moved out of the rule */
507			break;		/* not used by C */
508		case '\'':
509		case '"':
510			/* skip strings and character constants */
511			skip_string(c);
512			break;
513		case '/':
514			/* skip comments */
515			if (GETC(==, '*') || c == '/') {
516				skip_comment(c);
517				break;
518			}
519			(void)ungetc(c, inf);
520			c = '/';
521			goto norm;
522		case '\n':
523			SETLINE;
524			/*FALLTHROUGH*/
525		default:
526		norm:
527			if (c == key && !skip)
528				return (retval);
529			skip = NO;
530		}
531	return (retval);
532}
533