1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1987, 1993, 1994
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#if 0
33#ifndef lint
34static char sccsid[] = "@(#)C.c	8.4 (Berkeley) 4/2/94";
35#endif
36#endif
37
38#include <sys/cdefs.h>
39__FBSDID("$FreeBSD$");
40
41#include <limits.h>
42#include <stdio.h>
43#include <string.h>
44
45#include "ctags.h"
46
47static int	func_entry(void);
48static void	hash_entry(void);
49static void	skip_string(int);
50static int	str_entry(int);
51
52/*
53 * c_entries --
54 *	read .c and .h files and call appropriate routines
55 */
56void
57c_entries(void)
58{
59	int	c;			/* current character */
60	int	level;			/* brace level */
61	int	token;			/* if reading a token */
62	int	t_def;			/* if reading a typedef */
63	int	t_level;		/* typedef's brace level */
64	char	*sp;			/* buffer pointer */
65	char	tok[MAXTOKEN];		/* token buffer */
66
67	lineftell = ftell(inf);
68	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
69	while (GETC(!=, EOF)) {
70		switch (c) {
71		/*
72		 * Here's where it DOESN'T handle: {
73		 *	foo(a)
74		 *	{
75		 *	#ifdef notdef
76		 *		}
77		 *	#endif
78		 *		if (a)
79		 *			puts("hello, world");
80		 *	}
81		 */
82		case '{':
83			++level;
84			goto endtok;
85		case '}':
86			/*
87			 * if level goes below zero, try and fix
88			 * it, even though we've already messed up
89			 */
90			if (--level < 0)
91				level = 0;
92			goto endtok;
93
94		case '\n':
95			SETLINE;
96			/*
97			 * the above 3 cases are similar in that they
98			 * are special characters that also end tokens.
99			 */
100	endtok:			if (sp > tok) {
101				*sp = EOS;
102				token = YES;
103				sp = tok;
104			}
105			else
106				token = NO;
107			continue;
108
109		/*
110		 * We ignore quoted strings and character constants
111		 * completely.
112		 */
113		case '"':
114		case '\'':
115			skip_string(c);
116			break;
117
118		/*
119		 * comments can be fun; note the state is unchanged after
120		 * return, in case we found:
121		 *	"foo() XX comment XX { int bar; }"
122		 */
123		case '/':
124			if (GETC(==, '*') || c == '/') {
125				skip_comment(c);
126				continue;
127			}
128			(void)ungetc(c, inf);
129			c = '/';
130			goto storec;
131
132		/* hash marks flag #define's. */
133		case '#':
134			if (sp == tok) {
135				hash_entry();
136				break;
137			}
138			goto storec;
139
140		/*
141		 * if we have a current token, parenthesis on
142		 * level zero indicates a function.
143		 */
144		case '(':
145			if (!level && token) {
146				int	curline;
147
148				if (sp != tok)
149					*sp = EOS;
150				/*
151				 * grab the line immediately, we may
152				 * already be wrong, for example,
153				 *	foo\n
154				 *	(arg1,
155				 */
156				get_line();
157				curline = lineno;
158				if (func_entry()) {
159					++level;
160					pfnote(tok, curline);
161				}
162				break;
163			}
164			goto storec;
165
166		/*
167		 * semi-colons indicate the end of a typedef; if we find a
168		 * typedef we search for the next semi-colon of the same
169		 * level as the typedef.  Ignoring "structs", they are
170		 * tricky, since you can find:
171		 *
172		 *	"typedef long time_t;"
173		 *	"typedef unsigned int u_int;"
174		 *	"typedef unsigned int u_int [10];"
175		 *
176		 * If looking at a typedef, we save a copy of the last token
177		 * found.  Then, when we find the ';' we take the current
178		 * token if it starts with a valid token name, else we take
179		 * the one we saved.  There's probably some reasonable
180		 * alternative to this...
181		 */
182		case ';':
183			if (t_def && level == t_level) {
184				t_def = NO;
185				get_line();
186				if (sp != tok)
187					*sp = EOS;
188				pfnote(tok, lineno);
189				break;
190			}
191			goto storec;
192
193		/*
194		 * store characters until one that can't be part of a token
195		 * comes along; check the current token against certain
196		 * reserved words.
197		 */
198		default:
199			/* ignore whitespace */
200			if (c == ' ' || c == '\t') {
201				int save = c;
202				while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
203					;
204				if (c == EOF)
205					return;
206				(void)ungetc(c, inf);
207				c = save;
208			}
209	storec:		if (!intoken(c)) {
210				if (sp == tok)
211					break;
212				*sp = EOS;
213				if (tflag) {
214					/* no typedefs inside typedefs */
215					if (!t_def &&
216						   !memcmp(tok, "typedef",8)) {
217						t_def = YES;
218						t_level = level;
219						break;
220					}
221					/* catch "typedef struct" */
222					if ((!t_def || t_level < level)
223					    && (!memcmp(tok, "struct", 7)
224					    || !memcmp(tok, "union", 6)
225					    || !memcmp(tok, "enum", 5))) {
226						/*
227						 * get line immediately;
228						 * may change before '{'
229						 */
230						get_line();
231						if (str_entry(c))
232							++level;
233						break;
234						/* } */
235					}
236				}
237				sp = tok;
238			}
239			else if (sp != tok || begtoken(c)) {
240				if (sp == tok + sizeof tok - 1)
241					/* Too long -- truncate it */
242					*sp = EOS;
243				else
244					*sp++ = c;
245				token = YES;
246			}
247			continue;
248		}
249
250		sp = tok;
251		token = NO;
252	}
253}
254
255/*
256 * func_entry --
257 *	handle a function reference
258 */
259static int
260func_entry(void)
261{
262	int	c;			/* current character */
263	int	level = 0;		/* for matching '()' */
264
265	/*
266	 * Find the end of the assumed function declaration.
267	 * Note that ANSI C functions can have type definitions so keep
268	 * track of the parentheses nesting level.
269	 */
270	while (GETC(!=, EOF)) {
271		switch (c) {
272		case '\'':
273		case '"':
274			/* skip strings and character constants */
275			skip_string(c);
276			break;
277		case '/':
278			/* skip comments */
279			if (GETC(==, '*') || c == '/')
280				skip_comment(c);
281			break;
282		case '(':
283			level++;
284			break;
285		case ')':
286			if (level == 0)
287				goto fnd;
288			level--;
289			break;
290		case '\n':
291			SETLINE;
292		}
293	}
294	return (NO);
295fnd:
296	/*
297	 * we assume that the character after a function's right paren
298	 * is a token character if it's a function and a non-token
299	 * character if it's a declaration.  Comments don't count...
300	 */
301	for (;;) {
302		while (GETC(!=, EOF) && iswhite(c))
303			if (c == '\n')
304				SETLINE;
305		if (intoken(c) || c == '{')
306			break;
307		if (c == '/' && (GETC(==, '*') || c == '/'))
308			skip_comment(c);
309		else {				/* don't ever "read" '/' */
310			(void)ungetc(c, inf);
311			return (NO);
312		}
313	}
314	if (c != '{')
315		(void)skip_key('{');
316	return (YES);
317}
318
319/*
320 * hash_entry --
321 *	handle a line starting with a '#'
322 */
323static void
324hash_entry(void)
325{
326	int	c;			/* character read */
327	int	curline;		/* line started on */
328	char	*sp;			/* buffer pointer */
329	char	tok[MAXTOKEN];		/* storage buffer */
330
331	/* ignore leading whitespace */
332	while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
333		;
334	(void)ungetc(c, inf);
335
336	curline = lineno;
337	for (sp = tok;;) {		/* get next token */
338		if (GETC(==, EOF))
339			return;
340		if (iswhite(c))
341			break;
342		if (sp == tok + sizeof tok - 1)
343			/* Too long -- truncate it */
344			*sp = EOS;
345		else
346			*sp++ = c;
347	}
348	*sp = EOS;
349	if (memcmp(tok, "define", 6))	/* only interested in #define's */
350		goto skip;
351	for (;;) {			/* this doesn't handle "#define \n" */
352		if (GETC(==, EOF))
353			return;
354		if (!iswhite(c))
355			break;
356	}
357	for (sp = tok;;) {		/* get next token */
358		if (sp == tok + sizeof tok - 1)
359			/* Too long -- truncate it */
360			*sp = EOS;
361		else
362			*sp++ = c;
363		if (GETC(==, EOF))
364			return;
365		/*
366		 * this is where it DOESN'T handle
367		 * "#define \n"
368		 */
369		if (!intoken(c))
370			break;
371	}
372	*sp = EOS;
373	if (dflag || c == '(') {	/* only want macros */
374		get_line();
375		pfnote(tok, curline);
376	}
377skip:	if (c == '\n') {		/* get rid of rest of define */
378		SETLINE
379		if (*(sp - 1) != '\\')
380			return;
381	}
382	(void)skip_key('\n');
383}
384
385/*
386 * str_entry --
387 *	handle a struct, union or enum entry
388 */
389static int
390str_entry(int c) /* c is current character */
391{
392	int	curline;		/* line started on */
393	char	*sp;			/* buffer pointer */
394	char	tok[LINE_MAX];		/* storage buffer */
395
396	curline = lineno;
397	while (iswhite(c))
398		if (GETC(==, EOF))
399			return (NO);
400	if (c == '{')		/* it was "struct {" */
401		return (YES);
402	for (sp = tok;;) {		/* get next token */
403		if (sp == tok + sizeof tok - 1)
404			/* Too long -- truncate it */
405			*sp = EOS;
406		else
407			*sp++ = c;
408		if (GETC(==, EOF))
409			return (NO);
410		if (!intoken(c))
411			break;
412	}
413	switch (c) {
414		case '{':		/* it was "struct foo{" */
415			--sp;
416			break;
417		case '\n':		/* it was "struct foo\n" */
418			SETLINE;
419			/*FALLTHROUGH*/
420		default:		/* probably "struct foo " */
421			while (GETC(!=, EOF))
422				if (!iswhite(c))
423					break;
424			if (c != '{') {
425				(void)ungetc(c, inf);
426				return (NO);
427			}
428	}
429	*sp = EOS;
430	pfnote(tok, curline);
431	return (YES);
432}
433
434/*
435 * skip_comment --
436 *	skip over comment
437 */
438void
439skip_comment(int t) /* t is comment character */
440{
441	int	c;			/* character read */
442	int	star;			/* '*' flag */
443
444	for (star = 0; GETC(!=, EOF);)
445		switch(c) {
446		/* comments don't nest, nor can they be escaped. */
447		case '*':
448			star = YES;
449			break;
450		case '/':
451			if (star && t == '*')
452				return;
453			break;
454		case '\n':
455			if (t == '/')
456				return;
457			SETLINE;
458			/*FALLTHROUGH*/
459		default:
460			star = NO;
461			break;
462		}
463}
464
465/*
466 * skip_string --
467 *	skip to the end of a string or character constant.
468 */
469void
470skip_string(int key)
471{
472	int	c,
473		skip;
474
475	for (skip = NO; GETC(!=, EOF); )
476		switch (c) {
477		case '\\':		/* a backslash escapes anything */
478			skip = !skip;	/* we toggle in case it's "\\" */
479			break;
480		case '\n':
481			SETLINE;
482			/*FALLTHROUGH*/
483		default:
484			if (c == key && !skip)
485				return;
486			skip = NO;
487		}
488}
489
490/*
491 * skip_key --
492 *	skip to next char "key"
493 */
494int
495skip_key(int key)
496{
497	int	c,
498		skip,
499		retval;
500
501	for (skip = retval = NO; GETC(!=, EOF);)
502		switch(c) {
503		case '\\':		/* a backslash escapes anything */
504			skip = !skip;	/* we toggle in case it's "\\" */
505			break;
506		case ';':		/* special case for yacc; if one */
507		case '|':		/* of these chars occurs, we may */
508			retval = YES;	/* have moved out of the rule */
509			break;		/* not used by C */
510		case '\'':
511		case '"':
512			/* skip strings and character constants */
513			skip_string(c);
514			break;
515		case '/':
516			/* skip comments */
517			if (GETC(==, '*') || c == '/') {
518				skip_comment(c);
519				break;
520			}
521			(void)ungetc(c, inf);
522			c = '/';
523			goto norm;
524		case '\n':
525			SETLINE;
526			/*FALLTHROUGH*/
527		default:
528		norm:
529			if (c == key && !skip)
530				return (retval);
531			skip = NO;
532		}
533	return (retval);
534}
535