emit1.c revision 281168
1269162Sbapt/* $NetBSD: emit1.c,v 1.14 2004/06/20 22:20:16 jmc Exp $ */
2269162Sbapt
3269162Sbapt/*
4269162Sbapt * Copyright (c) 1996 Christopher G. Demetriou.  All Rights Reserved.
5269162Sbapt * Copyright (c) 1994, 1995 Jochen Pohl
6269162Sbapt * All Rights Reserved.
7269162Sbapt *
8269162Sbapt * Redistribution and use in source and binary forms, with or without
9269162Sbapt * modification, are permitted provided that the following conditions
10269162Sbapt * are met:
11269162Sbapt * 1. Redistributions of source code must retain the above copyright
12269162Sbapt *    notice, this list of conditions and the following disclaimer.
13269162Sbapt * 2. Redistributions in binary form must reproduce the above copyright
14269162Sbapt *    notice, this list of conditions and the following disclaimer in the
15269162Sbapt *    documentation and/or other materials provided with the distribution.
16269162Sbapt * 3. All advertising materials mentioning features or use of this software
17269162Sbapt *    must display the following acknowledgement:
18269162Sbapt *      This product includes software developed by Jochen Pohl for
19269162Sbapt *	The NetBSD Project.
20269162Sbapt * 4. The name of the author may not be used to endorse or promote products
21269162Sbapt *    derived from this software without specific prior written permission.
22269162Sbapt *
23269162Sbapt * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24269162Sbapt * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25269162Sbapt * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26269162Sbapt * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27269162Sbapt * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28269162Sbapt * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29269162Sbapt * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30269162Sbapt * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31269162Sbapt * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32269162Sbapt * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33269162Sbapt */
34269162Sbapt
35269162Sbapt#include <sys/cdefs.h>
36269162Sbapt#if defined(__RCSID) && !defined(lint)
37269162Sbapt__RCSID("$NetBSD: emit1.c,v 1.14 2004/06/20 22:20:16 jmc Exp $");
38269162Sbapt#endif
39269162Sbapt__FBSDID("$FreeBSD: stable/10/usr.bin/xlint/lint1/emit1.c 281168 2015-04-06 19:56:27Z pfg $");
40269162Sbapt
41269162Sbapt#include <ctype.h>
42269162Sbapt
43269162Sbapt#include "lint1.h"
44269162Sbapt
45269162Sbaptstatic	void	outtt(sym_t *, sym_t *);
46269162Sbaptstatic	void	outfstrg(strg_t *);
47269162Sbapt
48269162Sbapt/*
49269162Sbapt * Write type into the output buffer.
50269162Sbapt * The type is written as a sequence of substrings, each of which describes a
51269162Sbapt * node of type type_t
52269162Sbapt * a node is coded as follows:
53269162Sbapt *	char			C
54269162Sbapt *	signed char		s C
55269162Sbapt *	unsigned char		u C
56269162Sbapt *	short			S
57269162Sbapt *	unsigned short		u S
58269162Sbapt *	int			I
59269162Sbapt *	unsigned int		u I
60269162Sbapt *	long			L
61269162Sbapt *	unsigned long		u L
62269162Sbapt *	long long		Q
63269162Sbapt *	unsigned long long	u Q
64269162Sbapt *	float			s D
65269162Sbapt *	double			D
66269162Sbapt *	long double		l D
67269162Sbapt *	void			V
68269162Sbapt *	*			P
69269162Sbapt *	[n]			A n
70269162Sbapt *	()			F
71269162Sbapt *	(void)			F 0
72269162Sbapt *	(n arguments)		F n arg1 arg2 ... argn
73269162Sbapt *	(n arguments, ...)	F n arg1 arg2 ... argn-1 E
74269162Sbapt *	(a, b, c, ...)		f n arg1 arg2 ...
75269162Sbapt *	enum tag		e T tag_or_typename
76269162Sbapt *	struct tag		s T tag_or_typename
77269162Sbapt *	union tag		u T tag_or_typename
78269162Sbapt *
79269162Sbapt *	tag_or_typename		0			no tag or type name
80269162Sbapt *				1 n tag			Tag
81269162Sbapt *				2 n typename		only type name
82269162Sbapt *
83269162Sbapt * spaces are only for better readability
84269162Sbapt * additionally it is possible to prepend the characters 'c' (for const)
85269162Sbapt * and 'v' (for volatile)
86269162Sbapt */
87269162Sbaptvoid
88269162Sbaptouttype(type_t *tp)
89269162Sbapt{
90269162Sbapt	int	t, s, na;
91269162Sbapt	sym_t	*arg;
92269162Sbapt	tspec_t	ts;
93269162Sbapt
94269162Sbapt	while (tp != NULL) {
95269162Sbapt		if ((ts = tp->t_tspec) == INT && tp->t_isenum)
96269162Sbapt			ts = ENUM;
97269162Sbapt		switch (ts) {
98269162Sbapt		case CHAR:	t = 'C';	s = '\0';	break;
99269162Sbapt		case SCHAR:	t = 'C';	s = 's';	break;
100269162Sbapt		case UCHAR:	t = 'C';	s = 'u';	break;
101269162Sbapt		case SHORT:	t = 'S';	s = '\0';	break;
102269162Sbapt		case USHORT:	t = 'S';	s = 'u';	break;
103269162Sbapt		case INT:	t = 'I';	s = '\0';	break;
104269162Sbapt		case UINT:	t = 'I';	s = 'u';	break;
105269162Sbapt		case LONG:	t = 'L';	s = '\0';	break;
106269162Sbapt		case ULONG:	t = 'L';	s = 'u';	break;
107269162Sbapt		case QUAD:	t = 'Q';	s = '\0';	break;
108269162Sbapt		case UQUAD:	t = 'Q';	s = 'u';	break;
109269162Sbapt		case FLOAT:	t = 'D';	s = 's';	break;
110269162Sbapt		case DOUBLE:	t = 'D';	s = '\0';	break;
111269162Sbapt		case LDOUBLE:	t = 'D';	s = 'l';	break;
112269162Sbapt		case VOID:	t = 'V';	s = '\0';	break;
113269162Sbapt		case PTR:	t = 'P';	s = '\0';	break;
114269162Sbapt		case ARRAY:	t = 'A';	s = '\0';	break;
115269162Sbapt		case FUNC:	t = 'F';	s = '\0';	break;
116269162Sbapt		case ENUM:	t = 'T';	s = 'e';	break;
117269162Sbapt		case STRUCT:	t = 'T';	s = 's';	break;
118269162Sbapt		case UNION:	t = 'T';	s = 'u';	break;
119269162Sbapt		default:
120269162Sbapt			LERROR("outtyp()");
121269162Sbapt		}
122269162Sbapt		if (tp->t_const)
123269162Sbapt			outchar('c');
124269162Sbapt		if (tp->t_volatile)
125269162Sbapt			outchar('v');
126269162Sbapt		if (s != '\0')
127269162Sbapt			outchar(s);
128269162Sbapt		outchar(t);
129269162Sbapt		if (ts == ARRAY) {
130269162Sbapt			outint(tp->t_dim);
131269162Sbapt		} else if (ts == ENUM) {
132269162Sbapt			outtt(tp->t_enum->etag, tp->t_enum->etdef);
133269162Sbapt		} else if (ts == STRUCT || ts == UNION) {
134269162Sbapt			outtt(tp->t_str->stag, tp->t_str->stdef);
135269162Sbapt		} else if (ts == FUNC && tp->t_proto) {
136269162Sbapt			na = 0;
137269162Sbapt			for (arg = tp->t_args; arg != NULL; arg = arg->s_nxt)
138269162Sbapt					na++;
139269162Sbapt			if (tp->t_vararg)
140269162Sbapt				na++;
141269162Sbapt			outint(na);
142269162Sbapt			for (arg = tp->t_args; arg != NULL; arg = arg->s_nxt)
143269162Sbapt				outtype(arg->s_type);
144269162Sbapt			if (tp->t_vararg)
145269162Sbapt				outchar('E');
146269162Sbapt		}
147269162Sbapt		tp = tp->t_subt;
148269162Sbapt	}
149269162Sbapt}
150269162Sbapt
151269162Sbapt/*
152269162Sbapt * type to string
153269162Sbapt * used for debugging output
154269162Sbapt *
155269162Sbapt * it uses its own output buffer for conversion
156269162Sbapt */
157269162Sbaptconst char *
158269162Sbaptttos(type_t *tp)
159269162Sbapt{
160269162Sbapt	static	ob_t	tob;
161269162Sbapt	ob_t	tmp;
162269162Sbapt
163269162Sbapt	if (tob.o_buf == NULL) {
164269162Sbapt		tob.o_len = 64;
165269162Sbapt		tob.o_buf = tob.o_nxt = xmalloc(tob.o_len);
166269162Sbapt		tob.o_end = tob.o_buf + tob.o_len;
167269162Sbapt	}
168269162Sbapt
169269162Sbapt	tmp = ob;
170269162Sbapt	ob = tob;
171269162Sbapt	ob.o_nxt = ob.o_buf;
172269162Sbapt	outtype(tp);
173269162Sbapt	outchar('\0');
174269162Sbapt	tob = ob;
175269162Sbapt	ob = tmp;
176269162Sbapt
177269162Sbapt	return (tob.o_buf);
178269162Sbapt}
179269162Sbapt
180269162Sbapt/*
181269162Sbapt * write the name of a tag or typename
182269162Sbapt *
183269162Sbapt * if the tag is named, the name of the
184269162Sbapt * tag is written, otherwise, if a typename exists which
185269162Sbapt * refers to this tag, this typename is written
186269162Sbapt */
187269162Sbaptstatic void
188269162Sbaptouttt(sym_t *tag, sym_t *tdef)
189269162Sbapt{
190269162Sbapt
191269162Sbapt	/*
192269162Sbapt	 * 0 is no longer used.
193269162Sbapt	 */
194269162Sbapt	if (tag->s_name != unnamed) {
195269162Sbapt		outint(1);
196269162Sbapt		outname(tag->s_name);
197269162Sbapt	} else if (tdef != NULL) {
198269162Sbapt		outint(2);
199269162Sbapt		outname(tdef->s_name);
200269162Sbapt	} else {
201269162Sbapt		outint(3);
202269162Sbapt		outint(tag->s_dpos.p_line);
203269162Sbapt		outchar('.');
204269162Sbapt		outint(getfnid(tag->s_dpos.p_file));
205269162Sbapt		outchar('.');
206269162Sbapt		outint(tag->s_dpos.p_uniq);
207269162Sbapt	}
208269162Sbapt}
209269162Sbapt
210269162Sbapt/*
211269162Sbapt * write information about a global declared/defined symbol
212269162Sbapt * with storage class extern
213269162Sbapt *
214269162Sbapt * informations about function definitions are written in outfdef(),
215269162Sbapt * not here
216269162Sbapt */
217269162Sbaptvoid
218269162Sbaptoutsym(sym_t *sym, scl_t sc, def_t def)
219269162Sbapt{
220269162Sbapt
221269162Sbapt	/*
222269162Sbapt	 * Static function declarations must also be written to the output
223269162Sbapt	 * file. Compatibility of function declarations (for both static
224269162Sbapt	 * and extern functions) must be checked in lint2. Lint1 can't do
225269162Sbapt	 * this, especially not, if functions are declared at block level
226269162Sbapt	 * before their first declaration at level 0.
227269162Sbapt	 */
228269162Sbapt	if (sc != EXTERN && !(sc == STATIC && sym->s_type->t_tspec == FUNC))
229269162Sbapt		return;
230269162Sbapt
231269162Sbapt	/* reset buffer */
232269162Sbapt	outclr();
233269162Sbapt
234269162Sbapt	/*
235269162Sbapt	 * line number of .c source, 'd' for declaration, Id of current
236269162Sbapt	 * source (.c or .h), and line in current source.
237269162Sbapt	 */
238269162Sbapt	outint(csrc_pos.p_line);
239269162Sbapt	outchar('d');
240269162Sbapt	outint(getfnid(sym->s_dpos.p_file));
241269162Sbapt	outchar('.');
242269162Sbapt	outint(sym->s_dpos.p_line);
243269162Sbapt
244269162Sbapt	/* flags */
245269162Sbapt
246269162Sbapt	switch (def) {
247269162Sbapt	case DEF:
248269162Sbapt		/* defined */
249269162Sbapt		outchar('d');
250269162Sbapt		break;
251269162Sbapt	case TDEF:
252269162Sbapt		/* tentative defined */
253269162Sbapt		outchar('t');
254269162Sbapt		break;
255269162Sbapt	case DECL:
256269162Sbapt		/* declared */
257269162Sbapt		outchar('e');
258269162Sbapt		break;
259269162Sbapt	default:
260269162Sbapt		LERROR("outsym()");
261269162Sbapt	}
262269162Sbapt	if (llibflg && def != DECL) {
263269162Sbapt		/*
264269162Sbapt		 * mark it as used so we get no warnings from lint2 about
265269162Sbapt		 * unused symbols in libraries.
266269162Sbapt		 */
267269162Sbapt		outchar('u');
268269162Sbapt	}
269269162Sbapt
270269162Sbapt	if (sc == STATIC)
271269162Sbapt		outchar('s');
272269162Sbapt
273269162Sbapt	/* name of the symbol */
274269162Sbapt	outname(sym->s_name);
275269162Sbapt
276269162Sbapt	/* renamed name of symbol, if necessary */
277269162Sbapt	if (sym->s_rename) {
278269162Sbapt		outchar('r');
279269162Sbapt		outname(sym->s_rename);
280269162Sbapt	}
281269162Sbapt
282269162Sbapt	/* type of the symbol */
283269162Sbapt	outtype(sym->s_type);
284269162Sbapt}
285269162Sbapt
286269162Sbapt/*
287269162Sbapt * write information about function definition
288269162Sbapt *
289269162Sbapt * this is also done for static functions so we are able to check if
290269162Sbapt * they are called with proper argument types
291269162Sbapt */
292269162Sbaptvoid
293269162Sbaptoutfdef(sym_t *fsym, pos_t *posp, int rval, int osdef, sym_t *args)
294269162Sbapt{
295269162Sbapt	int	narg;
296269162Sbapt	sym_t	*arg;
297269162Sbapt
298269162Sbapt	/* reset the buffer */
299269162Sbapt	outclr();
300269162Sbapt
301269162Sbapt	/*
302269162Sbapt	 * line number of .c source, 'd' for declaration, Id of current
303269162Sbapt	 * source (.c or .h), and line in current source
304269162Sbapt	 *
305269162Sbapt	 * we are already at the end of the function. If we are in the
306269162Sbapt	 * .c source, posp->p_line is correct, otherwise csrc_pos.p_line
307269162Sbapt	 * (for functions defined in header files).
308269162Sbapt	 */
309269162Sbapt	if (posp->p_file == csrc_pos.p_file) {
310269162Sbapt		outint(posp->p_line);
311269162Sbapt	} else {
312269162Sbapt		outint(csrc_pos.p_line);
313269162Sbapt	}
314269162Sbapt	outchar('d');
315269162Sbapt	outint(getfnid(posp->p_file));
316269162Sbapt	outchar('.');
317269162Sbapt	outint(posp->p_line);
318269162Sbapt
319269162Sbapt	/* flags */
320269162Sbapt
321269162Sbapt	/* both SCANFLIKE and PRINTFLIKE imply VARARGS */
322269162Sbapt	if (prflstrg != -1) {
323269162Sbapt		nvararg = prflstrg;
324269162Sbapt	} else if (scflstrg != -1) {
325269162Sbapt		nvararg = scflstrg;
326269162Sbapt	}
327269162Sbapt
328269162Sbapt	if (nvararg != -1) {
329269162Sbapt		outchar('v');
330269162Sbapt		outint(nvararg);
331	}
332	if (scflstrg != -1) {
333		outchar('S');
334		outint(scflstrg);
335	}
336	if (prflstrg != -1) {
337		outchar('P');
338		outint(prflstrg);
339	}
340	nvararg = prflstrg = scflstrg = -1;
341
342	outchar('d');
343
344	if (rval)
345		/* has return value */
346		outchar('r');
347
348	if (llibflg)
349		/*
350		 * mark it as used so lint2 does not complain about
351		 * unused symbols in libraries
352		 */
353		outchar('u');
354
355	if (osdef)
356		/* old style function definition */
357		outchar('o');
358
359	if (fsym->s_scl == STATIC)
360		outchar('s');
361
362	/* name of function */
363	outname(fsym->s_name);
364
365	/* renamed name of function, if necessary */
366	if (fsym->s_rename) {
367		outchar('r');
368		outname(fsym->s_rename);
369	}
370
371	/* argument types and return value */
372	if (osdef) {
373		narg = 0;
374		for (arg = args; arg != NULL; arg = arg->s_nxt)
375			narg++;
376		outchar('f');
377		outint(narg);
378		for (arg = args; arg != NULL; arg = arg->s_nxt)
379			outtype(arg->s_type);
380		outtype(fsym->s_type->t_subt);
381	} else {
382		outtype(fsym->s_type);
383	}
384}
385
386/*
387 * write out all information necessary for lint2 to check function
388 * calls
389 *
390 * rvused is set if the return value is used (asigned to a variable)
391 * rvdisc is set if the return value is not used and not ignored
392 * (casted to void)
393 */
394void
395outcall(tnode_t *tn, int rvused, int rvdisc)
396{
397	tnode_t	*args, *arg;
398	int	narg, n, i;
399	int64_t	q;
400	tspec_t	t;
401
402	/* reset buffer */
403	outclr();
404
405	/*
406	 * line number of .c source, 'c' for function call, Id of current
407	 * source (.c or .h), and line in current source
408	 */
409	outint(csrc_pos.p_line);
410	outchar('c');
411	outint(getfnid(curr_pos.p_file));
412	outchar('.');
413	outint(curr_pos.p_line);
414
415	/*
416	 * flags; 'u' and 'i' must be last to make sure a letter
417	 * is between the numeric argument of a flag and the name of
418	 * the function
419	 */
420	narg = 0;
421	args = tn->tn_right;
422	for (arg = args; arg != NULL; arg = arg->tn_right)
423		narg++;
424	/* informations about arguments */
425	for (n = 1; n <= narg; n++) {
426		/* the last argument is the top one in the tree */
427		for (i = narg, arg = args; i > n; i--, arg = arg->tn_right)
428			continue;
429		arg = arg->tn_left;
430		if (arg->tn_op == CON) {
431			if (isityp(t = arg->tn_type->t_tspec)) {
432				/*
433				 * XXX it would probably be better to
434				 * explizitly test the sign
435				 */
436				if ((q = arg->tn_val->v_quad) == 0) {
437					/* zero constant */
438					outchar('z');
439				} else if (msb(q, t, 0) == 0) {
440					/* positive if casted to signed */
441					outchar('p');
442				} else {
443					/* negative if casted to signed */
444					outchar('n');
445				}
446				outint(n);
447			}
448		} else if (arg->tn_op == AMPER &&
449			   arg->tn_left->tn_op == STRING &&
450			   arg->tn_left->tn_strg->st_tspec == CHAR) {
451			/* constant string, write all format specifiers */
452			outchar('s');
453			outint(n);
454			outfstrg(arg->tn_left->tn_strg);
455		}
456
457	}
458	/* return value discarded/used/ignored */
459	outchar(rvdisc ? 'd' : (rvused ? 'u' : 'i'));
460
461	/* name of the called function */
462	outname(tn->tn_left->tn_left->tn_sym->s_name);
463
464	/* types of arguments */
465	outchar('f');
466	outint(narg);
467	for (n = 1; n <= narg; n++) {
468		/* the last argument is the top one in the tree */
469		for (i = narg, arg = args; i > n; i--, arg = arg->tn_right)
470			continue;
471		outtype(arg->tn_left->tn_type);
472	}
473	/* expected type of return value */
474	outtype(tn->tn_type);
475}
476
477/*
478 * extracts potential format specifiers for printf() and scanf() and
479 * writes them, enclosed in "" and qouted if necessary, to the output buffer
480 */
481static void
482outfstrg(strg_t *strg)
483{
484	int	c, oc, first;
485	u_char	*cp;
486
487	if (strg->st_tspec != CHAR)
488		LERROR("outfstrg()");
489
490	cp = strg->st_cp;
491
492	outchar('"');
493
494	c = *cp++;
495
496	while (c != '\0') {
497
498		if (c != '%') {
499			c = *cp++;
500			continue;
501		}
502
503		outqchar('%');
504		c = *cp++;
505
506		/* flags for printf and scanf and *-fieldwidth for printf */
507		while (c != '\0' && (c == '-' || c == '+' || c == ' ' ||
508				     c == '#' || c == '0' || c == '*')) {
509			outqchar(c);
510			c = *cp++;
511		}
512
513		/* numeric field width */
514		while (c != '\0' && isdigit(c)) {
515			outqchar(c);
516			c = *cp++;
517		}
518
519		/* precision for printf */
520		if (c == '.') {
521			outqchar(c);
522			if ((c = *cp++) == '*') {
523				outqchar(c);
524				c = *cp++;
525			} else {
526				while (c != '\0' && isdigit(c)) {
527					outqchar(c);
528					c = *cp++;
529				}
530			}
531		}
532
533		/* h, l, L and q flags fpr printf and scanf */
534		if (c == 'h' || c == 'l' || c == 'L' || c == 'q') {
535			outqchar(c);
536			c = *cp++;
537		}
538
539		/*
540		 * The last character. It is always written so we can detect
541		 * invalid format specifiers.
542		 */
543		if (c != '\0') {
544			outqchar(c);
545			oc = c;
546			c = *cp++;
547			/*
548			 * handle [ for scanf. [-] means that a minus sign
549			 * was found at an undefined position.
550			 */
551			if (oc == '[') {
552				if (c == '^')
553					c = *cp++;
554				if (c == ']')
555					c = *cp++;
556				first = 1;
557				while (c != '\0' && c != ']') {
558					if (c == '-') {
559						if (!first && *cp != ']')
560							outqchar(c);
561					}
562					first = 0;
563					c = *cp++;
564				}
565				if (c == ']') {
566					outqchar(c);
567					c = *cp++;
568				}
569			}
570		}
571
572	}
573
574	outchar('"');
575}
576
577/*
578 * writes a record if sym was used
579 */
580void
581outusg(sym_t *sym)
582{
583	/* reset buffer */
584	outclr();
585
586	/*
587	 * line number of .c source, 'u' for used, Id of current
588	 * source (.c or .h), and line in current source
589	 */
590	outint(csrc_pos.p_line);
591	outchar('u');
592	outint(getfnid(curr_pos.p_file));
593	outchar('.');
594	outint(curr_pos.p_line);
595
596	/* necessary to delimit both numbers */
597	outchar('x');
598
599	/* Den Namen des Symbols ausgeben */
600	outname(sym->s_name);
601}
602