emit1.c revision 91587
1/* $NetBSD: emit1.c,v 1.11 2002/01/31 19:36:54 tv Exp $ */
2
3/*
4 * Copyright (c) 1996 Christopher G. Demetriou.  All Rights Reserved.
5 * Copyright (c) 1994, 1995 Jochen Pohl
6 * All Rights Reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *      This product includes software developed by Jochen Pohl for
19 *	The NetBSD Project.
20 * 4. The name of the author may not be used to endorse or promote products
21 *    derived from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
34
35#include <sys/cdefs.h>
36#if defined(__RCSID) && !defined(lint)
37__RCSID("$NetBSD: emit1.c,v 1.11 2002/01/31 19:36:54 tv Exp $");
38#endif
39
40#include <ctype.h>
41
42#include "lint1.h"
43
44static	void	outtt(sym_t *, sym_t *);
45static	void	outfstrg(strg_t *);
46
47/*
48 * Write type into the output buffer.
49 * The type is written as a sequence of substrings, each of which describes a
50 * node of type type_t
51 * a node is coded as follows:
52 *	char			C
53 *	signed char		s C
54 *	unsigned char		u C
55 *	short			S
56 *	unsigned short		u S
57 *	int			I
58 *	unsigned int		u I
59 *	long			L
60 *	unsigned long		u L
61 *	long long		Q
62 *	unsigned long long	u Q
63 *	float			s D
64 *	double			D
65 *	long double		l D
66 *	void			V
67 *	*			P
68 *	[n]			A n
69 *	()			F
70 *	(void)			F 0
71 *	(n arguments)		F n arg1 arg2 ... argn
72 *	(n arguments, ...)	F n arg1 arg2 ... argn-1 E
73 *	(a, b, c, ...)		f n arg1 arg2 ...
74 *	enum tag		e T tag_or_typename
75 *	struct tag		s T tag_or_typename
76 *	union tag		u T tag_or_typename
77 *
78 *	tag_or_typename		0			no tag or type name
79 *				1 n tag			Tag
80 *				2 n typename		only type name
81 *
82 * spaces are only for better readability
83 * additionaly it is possible to prepend the characters 'c' (for const)
84 * and 'v' (for volatile)
85 */
86void
87outtype(type_t *tp)
88{
89	int	t, s, na;
90	sym_t	*arg;
91	tspec_t	ts;
92
93	while (tp != NULL) {
94		if ((ts = tp->t_tspec) == INT && tp->t_isenum)
95			ts = ENUM;
96		switch (ts) {
97		case CHAR:	t = 'C';	s = '\0';	break;
98		case SCHAR:	t = 'C';	s = 's';	break;
99		case UCHAR:	t = 'C';	s = 'u';	break;
100		case SHORT:	t = 'S';	s = '\0';	break;
101		case USHORT:	t = 'S';	s = 'u';	break;
102		case INT:	t = 'I';	s = '\0';	break;
103		case UINT:	t = 'I';	s = 'u';	break;
104		case LONG:	t = 'L';	s = '\0';	break;
105		case ULONG:	t = 'L';	s = 'u';	break;
106		case QUAD:	t = 'Q';	s = '\0';	break;
107		case UQUAD:	t = 'Q';	s = 'u';	break;
108		case FLOAT:	t = 'D';	s = 's';	break;
109		case DOUBLE:	t = 'D';	s = '\0';	break;
110		case LDOUBLE:	t = 'D';	s = 'l';	break;
111		case VOID:	t = 'V';	s = '\0';	break;
112		case PTR:	t = 'P';	s = '\0';	break;
113		case ARRAY:	t = 'A';	s = '\0';	break;
114		case FUNC:	t = 'F';	s = '\0';	break;
115		case ENUM:	t = 'T';	s = 'e';	break;
116		case STRUCT:	t = 'T';	s = 's';	break;
117		case UNION:	t = 'T';	s = 'u';	break;
118		default:
119			lerror("outtyp() 1");
120		}
121		if (tp->t_const)
122			outchar('c');
123		if (tp->t_volatile)
124			outchar('v');
125		if (s != '\0')
126			outchar(s);
127		outchar(t);
128		if (ts == ARRAY) {
129			outint(tp->t_dim);
130		} else if (ts == ENUM) {
131			outtt(tp->t_enum->etag, tp->t_enum->etdef);
132		} else if (ts == STRUCT || ts == UNION) {
133			outtt(tp->t_str->stag, tp->t_str->stdef);
134		} else if (ts == FUNC && tp->t_proto) {
135			na = 0;
136			for (arg = tp->t_args; arg != NULL; arg = arg->s_nxt)
137					na++;
138			if (tp->t_vararg)
139				na++;
140			outint(na);
141			for (arg = tp->t_args; arg != NULL; arg = arg->s_nxt)
142				outtype(arg->s_type);
143			if (tp->t_vararg)
144				outchar('E');
145		}
146		tp = tp->t_subt;
147	}
148}
149
150/*
151 * type to string
152 * used for debugging output
153 *
154 * it uses its own output buffer for conversion
155 */
156const char *
157ttos(type_t *tp)
158{
159	static	ob_t	tob;
160	ob_t	tmp;
161
162	if (tob.o_buf == NULL) {
163		tob.o_len = 64;
164		tob.o_buf = tob.o_nxt = xmalloc(tob.o_len);
165		tob.o_end = tob.o_buf + tob.o_len;
166	}
167
168	tmp = ob;
169	ob = tob;
170	ob.o_nxt = ob.o_buf;
171	outtype(tp);
172	outchar('\0');
173	tob = ob;
174	ob = tmp;
175
176	return (tob.o_buf);
177}
178
179/*
180 * write the name of a tag or typename
181 *
182 * if the tag is named, the name of the
183 * tag is written, otherwise, if a typename exists which
184 * refers to this tag, this typename is written
185 */
186static void
187outtt(sym_t *tag, sym_t *tdef)
188{
189
190	/*
191	 * 0 is no longer used.
192	 */
193	if (tag->s_name != unnamed) {
194		outint(1);
195		outname(tag->s_name);
196	} else if (tdef != NULL) {
197		outint(2);
198		outname(tdef->s_name);
199	} else {
200		outint(3);
201		outint(tag->s_dpos.p_line);
202		outchar('.');
203		outint(getfnid(tag->s_dpos.p_file));
204		outchar('.');
205		outint(tag->s_dpos.p_uniq);
206	}
207}
208
209/*
210 * write information about an global declared/defined symbol
211 * with storage class extern
212 *
213 * informations about function definitions are written in outfdef(),
214 * not here
215 */
216void
217outsym(sym_t *sym, scl_t sc, def_t def)
218{
219
220	/*
221	 * Static function declarations must also be written to the output
222	 * file. Compatibility of function declarations (for both static
223	 * and extern functions) must be checked in lint2. Lint1 can't do
224	 * this, especially not, if functions are declared at block level
225	 * before their first declaration at level 0.
226	 */
227	if (sc != EXTERN && !(sc == STATIC && sym->s_type->t_tspec == FUNC))
228		return;
229
230	/* reset buffer */
231	outclr();
232
233	/*
234	 * line number of .c source, 'd' for declaration, Id of current
235	 * source (.c or .h), and line in current source.
236	 */
237	outint(csrc_pos.p_line);
238	outchar('d');
239	outint(getfnid(sym->s_dpos.p_file));
240	outchar('.');
241	outint(sym->s_dpos.p_line);
242
243	/* flags */
244
245	switch (def) {
246	case DEF:
247		/* defined */
248		outchar('d');
249		break;
250	case TDEF:
251		/* tentative defined */
252		outchar('t');
253		break;
254	case DECL:
255		/* declared */
256		outchar('e');
257		break;
258	default:
259		lerror("outsym() 2");
260	}
261	if (llibflg && def != DECL) {
262		/*
263		 * mark it as used so we get no warnings from lint2 about
264		 * unused symbols in libraries.
265		 */
266		outchar('u');
267	}
268
269	if (sc == STATIC)
270		outchar('s');
271
272	/* name of the symbol */
273	outname(sym->s_name);
274
275	/* renamed name of symbol, if necessary */
276	if (sym->s_rename) {
277		outchar('r');
278		outname(sym->s_rename);
279	}
280
281	/* type of the symbol */
282	outtype(sym->s_type);
283}
284
285/*
286 * write information about function definition
287 *
288 * this is also done for static functions so we are able to check if
289 * they are called with proper argument types
290 */
291void
292outfdef(sym_t *fsym, pos_t *posp, int rval, int osdef, sym_t *args)
293{
294	int	narg;
295	sym_t	*arg;
296
297	/* reset the buffer */
298	outclr();
299
300	/*
301	 * line number of .c source, 'd' for declaration, Id of current
302	 * source (.c or .h), and line in current source
303	 *
304	 * we are already at the end of the function. If we are in the
305	 * .c source, posp->p_line is correct, otherwise csrc_pos.p_line
306	 * (for functions defined in header files).
307	 */
308	if (posp->p_file == csrc_pos.p_file) {
309		outint(posp->p_line);
310	} else {
311		outint(csrc_pos.p_line);
312	}
313	outchar('d');
314	outint(getfnid(posp->p_file));
315	outchar('.');
316	outint(posp->p_line);
317
318	/* flags */
319
320	/* both SCANFLIKE and PRINTFLIKE imply VARARGS */
321	if (prflstrg != -1) {
322		nvararg = prflstrg;
323	} else if (scflstrg != -1) {
324		nvararg = scflstrg;
325	}
326
327	if (nvararg != -1) {
328		outchar('v');
329		outint(nvararg);
330	}
331	if (scflstrg != -1) {
332		outchar('S');
333		outint(scflstrg);
334	}
335	if (prflstrg != -1) {
336		outchar('P');
337		outint(prflstrg);
338	}
339	nvararg = prflstrg = scflstrg = -1;
340
341	outchar('d');
342
343	if (rval)
344		/* has return value */
345		outchar('r');
346
347	if (llibflg)
348		/*
349		 * mark it as used so lint2 does not complain about
350		 * unused symbols in libraries
351		 */
352		outchar('u');
353
354	if (osdef)
355		/* old style function definition */
356		outchar('o');
357
358	if (fsym->s_scl == STATIC)
359		outchar('s');
360
361	/* name of function */
362	outname(fsym->s_name);
363
364	/* renamed name of function, if necessary */
365	if (fsym->s_rename) {
366		outchar('r');
367		outname(fsym->s_rename);
368	}
369
370	/* argument types and return value */
371	if (osdef) {
372		narg = 0;
373		for (arg = args; arg != NULL; arg = arg->s_nxt)
374			narg++;
375		outchar('f');
376		outint(narg);
377		for (arg = args; arg != NULL; arg = arg->s_nxt)
378			outtype(arg->s_type);
379		outtype(fsym->s_type->t_subt);
380	} else {
381		outtype(fsym->s_type);
382	}
383}
384
385/*
386 * write out all information necessary for lint2 to check function
387 * calls
388 *
389 * rvused is set if the return value is used (asigned to a variable)
390 * rvdisc is set if the return value is not used and not ignored
391 * (casted to void)
392 */
393void
394outcall(tnode_t *tn, int rvused, int rvdisc)
395{
396	tnode_t	*args, *arg;
397	int	narg, n, i;
398	int64_t	q;
399	tspec_t	t;
400
401	/* reset buffer */
402	outclr();
403
404	/*
405	 * line number of .c source, 'c' for function call, Id of current
406	 * source (.c or .h), and line in current source
407	 */
408	outint(csrc_pos.p_line);
409	outchar('c');
410	outint(getfnid(curr_pos.p_file));
411	outchar('.');
412	outint(curr_pos.p_line);
413
414	/*
415	 * flags; 'u' and 'i' must be last to make sure a letter
416	 * is between the numeric argument of a flag and the name of
417	 * the function
418	 */
419	narg = 0;
420	args = tn->tn_right;
421	for (arg = args; arg != NULL; arg = arg->tn_right)
422		narg++;
423	/* informations about arguments */
424	for (n = 1; n <= narg; n++) {
425		/* the last argument is the top one in the tree */
426		for (i = narg, arg = args; i > n; i--, arg = arg->tn_right)
427			continue;
428		arg = arg->tn_left;
429		if (arg->tn_op == CON) {
430			if (isityp(t = arg->tn_type->t_tspec)) {
431				/*
432				 * XXX it would probably be better to
433				 * explizitly test the sign
434				 */
435				if ((q = arg->tn_val->v_quad) == 0) {
436					/* zero constant */
437					outchar('z');
438				} else if (msb(q, t, 0) == 0) {
439					/* positive if casted to signed */
440					outchar('p');
441				} else {
442					/* negative if casted to signed */
443					outchar('n');
444				}
445				outint(n);
446			}
447		} else if (arg->tn_op == AMPER &&
448			   arg->tn_left->tn_op == STRING &&
449			   arg->tn_left->tn_strg->st_tspec == CHAR) {
450			/* constant string, write all format specifiers */
451			outchar('s');
452			outint(n);
453			outfstrg(arg->tn_left->tn_strg);
454		}
455
456	}
457	/* return value discarded/used/ignored */
458	outchar(rvdisc ? 'd' : (rvused ? 'u' : 'i'));
459
460	/* name of the called function */
461	outname(tn->tn_left->tn_left->tn_sym->s_name);
462
463	/* types of arguments */
464	outchar('f');
465	outint(narg);
466	for (n = 1; n <= narg; n++) {
467		/* the last argument is the top one in the tree */
468		for (i = narg, arg = args; i > n; i--, arg = arg->tn_right)
469			continue;
470		outtype(arg->tn_left->tn_type);
471	}
472	/* expected type of return value */
473	outtype(tn->tn_type);
474}
475
476/*
477 * extracts potential format specifiers for printf() and scanf() and
478 * writes them, enclosed in "" and qouted if necessary, to the output buffer
479 */
480static void
481outfstrg(strg_t *strg)
482{
483	int	c, oc, first;
484	u_char	*cp;
485
486	if (strg->st_tspec != CHAR)
487		lerror("outfstrg() 1");
488
489	cp = strg->st_cp;
490
491	outchar('"');
492
493	c = *cp++;
494
495	while (c != '\0') {
496
497		if (c != '%') {
498			c = *cp++;
499			continue;
500		}
501
502		outqchar('%');
503		c = *cp++;
504
505		/* flags for printf and scanf and *-fieldwidth for printf */
506		while (c != '\0' && (c == '-' || c == '+' || c == ' ' ||
507				     c == '#' || c == '0' || c == '*')) {
508			outqchar(c);
509			c = *cp++;
510		}
511
512		/* numeric field width */
513		while (c != '\0' && isdigit(c)) {
514			outqchar(c);
515			c = *cp++;
516		}
517
518		/* precision for printf */
519		if (c == '.') {
520			outqchar(c);
521			if ((c = *cp++) == '*') {
522				outqchar(c);
523				c = *cp++;
524			} else {
525				while (c != '\0' && isdigit(c)) {
526					outqchar(c);
527					c = *cp++;
528				}
529			}
530		}
531
532		/* h, l, L and q flags fpr printf and scanf */
533		if (c == 'h' || c == 'l' || c == 'L' || c == 'q') {
534			outqchar(c);
535			c = *cp++;
536		}
537
538		/*
539		 * The last character. It is always written so we can detect
540		 * invalid format specifiers.
541		 */
542		if (c != '\0') {
543			outqchar(c);
544			oc = c;
545			c = *cp++;
546			/*
547			 * handle [ for scanf. [-] means that a minus sign
548			 * was found at an undefined position.
549			 */
550			if (oc == '[') {
551				if (c == '^')
552					c = *cp++;
553				if (c == ']')
554					c = *cp++;
555				first = 1;
556				while (c != '\0' && c != ']') {
557					if (c == '-') {
558						if (!first && *cp != ']')
559							outqchar(c);
560					}
561					first = 0;
562					c = *cp++;
563				}
564				if (c == ']') {
565					outqchar(c);
566					c = *cp++;
567				}
568			}
569		}
570
571	}
572
573	outchar('"');
574}
575
576/*
577 * writes a record if sym was used
578 */
579void
580outusg(sym_t *sym)
581{
582	/* reset buffer */
583	outclr();
584
585	/*
586	 * line number of .c source, 'u' for used, Id of current
587	 * source (.c or .h), and line in current source
588	 */
589	outint(csrc_pos.p_line);
590	outchar('u');
591	outint(getfnid(curr_pos.p_file));
592	outchar('.');
593	outint(curr_pos.p_line);
594
595	/* necessary to delimit both numbers */
596	outchar('x');
597
598	/* Den Namen des Symbols ausgeben */
599	outname(sym->s_name);
600}
601