fbt.c revision 285004
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 *
21 * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
22 *
23 * $FreeBSD: head/sys/cddl/dev/fbt/fbt.c 285004 2015-07-01 14:09:59Z br $
24 *
25 */
26
27/*
28 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
29 * Use is subject to license terms.
30 */
31
32#include <sys/cdefs.h>
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/conf.h>
36#include <sys/cpuvar.h>
37#include <sys/fcntl.h>
38#include <sys/filio.h>
39#include <sys/kdb.h>
40#include <sys/kernel.h>
41#include <sys/kmem.h>
42#include <sys/kthread.h>
43#include <sys/limits.h>
44#include <sys/linker.h>
45#include <sys/lock.h>
46#include <sys/malloc.h>
47#include <sys/module.h>
48#include <sys/mutex.h>
49#include <sys/pcpu.h>
50#include <sys/poll.h>
51#include <sys/proc.h>
52#include <sys/selinfo.h>
53#include <sys/smp.h>
54#include <sys/syscall.h>
55#include <sys/sysent.h>
56#include <sys/sysproto.h>
57#include <sys/uio.h>
58#include <sys/unistd.h>
59#include <machine/stdarg.h>
60
61#include <sys/dtrace.h>
62#include <sys/dtrace_bsd.h>
63
64#include "fbt.h"
65
66MALLOC_DEFINE(M_FBT, "fbt", "Function Boundary Tracing");
67
68dtrace_provider_id_t	fbt_id;
69fbt_probe_t		**fbt_probetab;
70int			fbt_probetab_mask;
71
72static d_open_t	fbt_open;
73static int	fbt_unload(void);
74static void	fbt_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
75static void	fbt_provide_module(void *, modctl_t *);
76static void	fbt_destroy(void *, dtrace_id_t, void *);
77static void	fbt_enable(void *, dtrace_id_t, void *);
78static void	fbt_disable(void *, dtrace_id_t, void *);
79static void	fbt_load(void *);
80static void	fbt_suspend(void *, dtrace_id_t, void *);
81static void	fbt_resume(void *, dtrace_id_t, void *);
82
83static struct cdevsw fbt_cdevsw = {
84	.d_version	= D_VERSION,
85	.d_open		= fbt_open,
86	.d_name		= "fbt",
87};
88
89static dtrace_pattr_t fbt_attr = {
90{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
91{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
92{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
93{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
94{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
95};
96
97static dtrace_pops_t fbt_pops = {
98	NULL,
99	fbt_provide_module,
100	fbt_enable,
101	fbt_disable,
102	fbt_suspend,
103	fbt_resume,
104	fbt_getargdesc,
105	NULL,
106	NULL,
107	fbt_destroy
108};
109
110static struct cdev		*fbt_cdev;
111static int			fbt_probetab_size;
112static int			fbt_verbose = 0;
113
114int
115fbt_excluded(const char *name)
116{
117
118	if (strncmp(name, "dtrace_", 7) == 0 &&
119	    strncmp(name, "dtrace_safe_", 12) != 0) {
120		/*
121		 * Anything beginning with "dtrace_" may be called
122		 * from probe context unless it explicitly indicates
123		 * that it won't be called from probe context by
124		 * using the prefix "dtrace_safe_".
125		 */
126		return (1);
127	}
128
129	/* Exclude some internal functions */
130	if (name[0] == '_' && name[1] == '_')
131		return (1);
132
133	/*
134	 * When DTrace is built into the kernel we need to exclude
135	 * the FBT functions from instrumentation.
136	 */
137#ifndef _KLD_MODULE
138	if (strncmp(name, "fbt_", 4) == 0)
139		return (1);
140#endif
141
142	return (0);
143}
144
145static void
146fbt_doubletrap(void)
147{
148	fbt_probe_t *fbt;
149	int i;
150
151	for (i = 0; i < fbt_probetab_size; i++) {
152		fbt = fbt_probetab[i];
153
154		for (; fbt != NULL; fbt = fbt->fbtp_next)
155			fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
156	}
157}
158
159static void
160fbt_provide_module(void *arg, modctl_t *lf)
161{
162	char modname[MAXPATHLEN];
163	int i;
164	size_t len;
165
166	strlcpy(modname, lf->filename, sizeof(modname));
167	len = strlen(modname);
168	if (len > 3 && strcmp(modname + len - 3, ".ko") == 0)
169		modname[len - 3] = '\0';
170
171	/*
172	 * Employees of dtrace and their families are ineligible.  Void
173	 * where prohibited.
174	 */
175	if (strcmp(modname, "dtrace") == 0)
176		return;
177
178	/*
179	 * To register with DTrace, a module must list 'dtrace' as a
180	 * dependency in order for the kernel linker to resolve
181	 * symbols like dtrace_register(). All modules with such a
182	 * dependency are ineligible for FBT tracing.
183	 */
184	for (i = 0; i < lf->ndeps; i++)
185		if (strncmp(lf->deps[i]->filename, "dtrace", 6) == 0)
186			return;
187
188	if (lf->fbt_nentries) {
189		/*
190		 * This module has some FBT entries allocated; we're afraid
191		 * to screw with it.
192		 */
193		return;
194	}
195
196	/*
197	 * List the functions in the module and the symbol values.
198	 */
199	(void) linker_file_function_listall(lf, fbt_provide_module_function, modname);
200}
201
202static void
203fbt_destroy(void *arg, dtrace_id_t id, void *parg)
204{
205	fbt_probe_t *fbt = parg, *next, *hash, *last;
206	modctl_t *ctl;
207	int ndx;
208
209	do {
210		ctl = fbt->fbtp_ctl;
211
212		ctl->fbt_nentries--;
213
214		/*
215		 * Now we need to remove this probe from the fbt_probetab.
216		 */
217		ndx = FBT_ADDR2NDX(fbt->fbtp_patchpoint);
218		last = NULL;
219		hash = fbt_probetab[ndx];
220
221		while (hash != fbt) {
222			ASSERT(hash != NULL);
223			last = hash;
224			hash = hash->fbtp_hashnext;
225		}
226
227		if (last != NULL) {
228			last->fbtp_hashnext = fbt->fbtp_hashnext;
229		} else {
230			fbt_probetab[ndx] = fbt->fbtp_hashnext;
231		}
232
233		next = fbt->fbtp_next;
234		free(fbt, M_FBT);
235
236		fbt = next;
237	} while (fbt != NULL);
238}
239
240static void
241fbt_enable(void *arg, dtrace_id_t id, void *parg)
242{
243	fbt_probe_t *fbt = parg;
244	modctl_t *ctl = fbt->fbtp_ctl;
245
246	ctl->nenabled++;
247
248	/*
249	 * Now check that our modctl has the expected load count.  If it
250	 * doesn't, this module must have been unloaded and reloaded -- and
251	 * we're not going to touch it.
252	 */
253	if (ctl->loadcnt != fbt->fbtp_loadcnt) {
254		if (fbt_verbose) {
255			printf("fbt is failing for probe %s "
256			    "(module %s reloaded)",
257			    fbt->fbtp_name, ctl->filename);
258		}
259
260		return;
261	}
262
263	for (; fbt != NULL; fbt = fbt->fbtp_next)
264		fbt_patch_tracepoint(fbt, fbt->fbtp_patchval);
265}
266
267static void
268fbt_disable(void *arg, dtrace_id_t id, void *parg)
269{
270	fbt_probe_t *fbt = parg;
271	modctl_t *ctl = fbt->fbtp_ctl;
272
273	ASSERT(ctl->nenabled > 0);
274	ctl->nenabled--;
275
276	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
277		return;
278
279	for (; fbt != NULL; fbt = fbt->fbtp_next)
280		fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
281}
282
283static void
284fbt_suspend(void *arg, dtrace_id_t id, void *parg)
285{
286	fbt_probe_t *fbt = parg;
287	modctl_t *ctl = fbt->fbtp_ctl;
288
289	ASSERT(ctl->nenabled > 0);
290
291	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
292		return;
293
294	for (; fbt != NULL; fbt = fbt->fbtp_next)
295		fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
296}
297
298static void
299fbt_resume(void *arg, dtrace_id_t id, void *parg)
300{
301	fbt_probe_t *fbt = parg;
302	modctl_t *ctl = fbt->fbtp_ctl;
303
304	ASSERT(ctl->nenabled > 0);
305
306	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
307		return;
308
309	for (; fbt != NULL; fbt = fbt->fbtp_next)
310		fbt_patch_tracepoint(fbt, fbt->fbtp_patchval);
311}
312
313static int
314fbt_ctfoff_init(modctl_t *lf, linker_ctf_t *lc)
315{
316	const Elf_Sym *symp = lc->symtab;;
317	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
318	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
319	int i;
320	uint32_t *ctfoff;
321	uint32_t objtoff = hp->cth_objtoff;
322	uint32_t funcoff = hp->cth_funcoff;
323	ushort_t info;
324	ushort_t vlen;
325
326	/* Sanity check. */
327	if (hp->cth_magic != CTF_MAGIC) {
328		printf("Bad magic value in CTF data of '%s'\n",lf->pathname);
329		return (EINVAL);
330	}
331
332	if (lc->symtab == NULL) {
333		printf("No symbol table in '%s'\n",lf->pathname);
334		return (EINVAL);
335	}
336
337	if ((ctfoff = malloc(sizeof(uint32_t) * lc->nsym, M_LINKER, M_WAITOK)) == NULL)
338		return (ENOMEM);
339
340	*lc->ctfoffp = ctfoff;
341
342	for (i = 0; i < lc->nsym; i++, ctfoff++, symp++) {
343		if (symp->st_name == 0 || symp->st_shndx == SHN_UNDEF) {
344			*ctfoff = 0xffffffff;
345			continue;
346		}
347
348		switch (ELF_ST_TYPE(symp->st_info)) {
349		case STT_OBJECT:
350			if (objtoff >= hp->cth_funcoff ||
351                            (symp->st_shndx == SHN_ABS && symp->st_value == 0)) {
352				*ctfoff = 0xffffffff;
353                                break;
354                        }
355
356                        *ctfoff = objtoff;
357                        objtoff += sizeof (ushort_t);
358			break;
359
360		case STT_FUNC:
361			if (funcoff >= hp->cth_typeoff) {
362				*ctfoff = 0xffffffff;
363				break;
364			}
365
366			*ctfoff = funcoff;
367
368			info = *((const ushort_t *)(ctfdata + funcoff));
369			vlen = CTF_INFO_VLEN(info);
370
371			/*
372			 * If we encounter a zero pad at the end, just skip it.
373			 * Otherwise skip over the function and its return type
374			 * (+2) and the argument list (vlen).
375			 */
376			if (CTF_INFO_KIND(info) == CTF_K_UNKNOWN && vlen == 0)
377				funcoff += sizeof (ushort_t); /* skip pad */
378			else
379				funcoff += sizeof (ushort_t) * (vlen + 2);
380			break;
381
382		default:
383			*ctfoff = 0xffffffff;
384			break;
385		}
386	}
387
388	return (0);
389}
390
391static ssize_t
392fbt_get_ctt_size(uint8_t version, const ctf_type_t *tp, ssize_t *sizep,
393    ssize_t *incrementp)
394{
395	ssize_t size, increment;
396
397	if (version > CTF_VERSION_1 &&
398	    tp->ctt_size == CTF_LSIZE_SENT) {
399		size = CTF_TYPE_LSIZE(tp);
400		increment = sizeof (ctf_type_t);
401	} else {
402		size = tp->ctt_size;
403		increment = sizeof (ctf_stype_t);
404	}
405
406	if (sizep)
407		*sizep = size;
408	if (incrementp)
409		*incrementp = increment;
410
411	return (size);
412}
413
414static int
415fbt_typoff_init(linker_ctf_t *lc)
416{
417	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
418	const ctf_type_t *tbuf;
419	const ctf_type_t *tend;
420	const ctf_type_t *tp;
421	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
422	int ctf_typemax = 0;
423	uint32_t *xp;
424	ulong_t pop[CTF_K_MAX + 1] = { 0 };
425
426
427	/* Sanity check. */
428	if (hp->cth_magic != CTF_MAGIC)
429		return (EINVAL);
430
431	tbuf = (const ctf_type_t *) (ctfdata + hp->cth_typeoff);
432	tend = (const ctf_type_t *) (ctfdata + hp->cth_stroff);
433
434	int child = hp->cth_parname != 0;
435
436	/*
437	 * We make two passes through the entire type section.  In this first
438	 * pass, we count the number of each type and the total number of types.
439	 */
440	for (tp = tbuf; tp < tend; ctf_typemax++) {
441		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
442		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
443		ssize_t size, increment;
444
445		size_t vbytes;
446		uint_t n;
447
448		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
449
450		switch (kind) {
451		case CTF_K_INTEGER:
452		case CTF_K_FLOAT:
453			vbytes = sizeof (uint_t);
454			break;
455		case CTF_K_ARRAY:
456			vbytes = sizeof (ctf_array_t);
457			break;
458		case CTF_K_FUNCTION:
459			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
460			break;
461		case CTF_K_STRUCT:
462		case CTF_K_UNION:
463			if (size < CTF_LSTRUCT_THRESH) {
464				ctf_member_t *mp = (ctf_member_t *)
465				    ((uintptr_t)tp + increment);
466
467				vbytes = sizeof (ctf_member_t) * vlen;
468				for (n = vlen; n != 0; n--, mp++)
469					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
470			} else {
471				ctf_lmember_t *lmp = (ctf_lmember_t *)
472				    ((uintptr_t)tp + increment);
473
474				vbytes = sizeof (ctf_lmember_t) * vlen;
475				for (n = vlen; n != 0; n--, lmp++)
476					child |=
477					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
478			}
479			break;
480		case CTF_K_ENUM:
481			vbytes = sizeof (ctf_enum_t) * vlen;
482			break;
483		case CTF_K_FORWARD:
484			/*
485			 * For forward declarations, ctt_type is the CTF_K_*
486			 * kind for the tag, so bump that population count too.
487			 * If ctt_type is unknown, treat the tag as a struct.
488			 */
489			if (tp->ctt_type == CTF_K_UNKNOWN ||
490			    tp->ctt_type >= CTF_K_MAX)
491				pop[CTF_K_STRUCT]++;
492			else
493				pop[tp->ctt_type]++;
494			/*FALLTHRU*/
495		case CTF_K_UNKNOWN:
496			vbytes = 0;
497			break;
498		case CTF_K_POINTER:
499		case CTF_K_TYPEDEF:
500		case CTF_K_VOLATILE:
501		case CTF_K_CONST:
502		case CTF_K_RESTRICT:
503			child |= CTF_TYPE_ISCHILD(tp->ctt_type);
504			vbytes = 0;
505			break;
506		default:
507			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
508			return (EIO);
509		}
510		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
511		pop[kind]++;
512	}
513
514	/* account for a sentinel value below */
515	ctf_typemax++;
516	*lc->typlenp = ctf_typemax;
517
518	if ((xp = malloc(sizeof(uint32_t) * ctf_typemax, M_LINKER, M_ZERO | M_WAITOK)) == NULL)
519		return (ENOMEM);
520
521	*lc->typoffp = xp;
522
523	/* type id 0 is used as a sentinel value */
524	*xp++ = 0;
525
526	/*
527	 * In the second pass, fill in the type offset.
528	 */
529	for (tp = tbuf; tp < tend; xp++) {
530		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
531		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
532		ssize_t size, increment;
533
534		size_t vbytes;
535		uint_t n;
536
537		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
538
539		switch (kind) {
540		case CTF_K_INTEGER:
541		case CTF_K_FLOAT:
542			vbytes = sizeof (uint_t);
543			break;
544		case CTF_K_ARRAY:
545			vbytes = sizeof (ctf_array_t);
546			break;
547		case CTF_K_FUNCTION:
548			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
549			break;
550		case CTF_K_STRUCT:
551		case CTF_K_UNION:
552			if (size < CTF_LSTRUCT_THRESH) {
553				ctf_member_t *mp = (ctf_member_t *)
554				    ((uintptr_t)tp + increment);
555
556				vbytes = sizeof (ctf_member_t) * vlen;
557				for (n = vlen; n != 0; n--, mp++)
558					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
559			} else {
560				ctf_lmember_t *lmp = (ctf_lmember_t *)
561				    ((uintptr_t)tp + increment);
562
563				vbytes = sizeof (ctf_lmember_t) * vlen;
564				for (n = vlen; n != 0; n--, lmp++)
565					child |=
566					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
567			}
568			break;
569		case CTF_K_ENUM:
570			vbytes = sizeof (ctf_enum_t) * vlen;
571			break;
572		case CTF_K_FORWARD:
573		case CTF_K_UNKNOWN:
574			vbytes = 0;
575			break;
576		case CTF_K_POINTER:
577		case CTF_K_TYPEDEF:
578		case CTF_K_VOLATILE:
579		case CTF_K_CONST:
580		case CTF_K_RESTRICT:
581			vbytes = 0;
582			break;
583		default:
584			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
585			return (EIO);
586		}
587		*xp = (uint32_t)((uintptr_t) tp - (uintptr_t) ctfdata);
588		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
589	}
590
591	return (0);
592}
593
594/*
595 * CTF Declaration Stack
596 *
597 * In order to implement ctf_type_name(), we must convert a type graph back
598 * into a C type declaration.  Unfortunately, a type graph represents a storage
599 * class ordering of the type whereas a type declaration must obey the C rules
600 * for operator precedence, and the two orderings are frequently in conflict.
601 * For example, consider these CTF type graphs and their C declarations:
602 *
603 * CTF_K_POINTER -> CTF_K_FUNCTION -> CTF_K_INTEGER  : int (*)()
604 * CTF_K_POINTER -> CTF_K_ARRAY -> CTF_K_INTEGER     : int (*)[]
605 *
606 * In each case, parentheses are used to raise operator * to higher lexical
607 * precedence, so the string form of the C declaration cannot be constructed by
608 * walking the type graph links and forming the string from left to right.
609 *
610 * The functions in this file build a set of stacks from the type graph nodes
611 * corresponding to the C operator precedence levels in the appropriate order.
612 * The code in ctf_type_name() can then iterate over the levels and nodes in
613 * lexical precedence order and construct the final C declaration string.
614 */
615typedef struct ctf_list {
616	struct ctf_list *l_prev; /* previous pointer or tail pointer */
617	struct ctf_list *l_next; /* next pointer or head pointer */
618} ctf_list_t;
619
620#define	ctf_list_prev(elem)	((void *)(((ctf_list_t *)(elem))->l_prev))
621#define	ctf_list_next(elem)	((void *)(((ctf_list_t *)(elem))->l_next))
622
623typedef enum {
624	CTF_PREC_BASE,
625	CTF_PREC_POINTER,
626	CTF_PREC_ARRAY,
627	CTF_PREC_FUNCTION,
628	CTF_PREC_MAX
629} ctf_decl_prec_t;
630
631typedef struct ctf_decl_node {
632	ctf_list_t cd_list;			/* linked list pointers */
633	ctf_id_t cd_type;			/* type identifier */
634	uint_t cd_kind;				/* type kind */
635	uint_t cd_n;				/* type dimension if array */
636} ctf_decl_node_t;
637
638typedef struct ctf_decl {
639	ctf_list_t cd_nodes[CTF_PREC_MAX];	/* declaration node stacks */
640	int cd_order[CTF_PREC_MAX];		/* storage order of decls */
641	ctf_decl_prec_t cd_qualp;		/* qualifier precision */
642	ctf_decl_prec_t cd_ordp;		/* ordered precision */
643	char *cd_buf;				/* buffer for output */
644	char *cd_ptr;				/* buffer location */
645	char *cd_end;				/* buffer limit */
646	size_t cd_len;				/* buffer space required */
647	int cd_err;				/* saved error value */
648} ctf_decl_t;
649
650/*
651 * Simple doubly-linked list append routine.  This implementation assumes that
652 * each list element contains an embedded ctf_list_t as the first member.
653 * An additional ctf_list_t is used to store the head (l_next) and tail
654 * (l_prev) pointers.  The current head and tail list elements have their
655 * previous and next pointers set to NULL, respectively.
656 */
657static void
658ctf_list_append(ctf_list_t *lp, void *new)
659{
660	ctf_list_t *p = lp->l_prev;	/* p = tail list element */
661	ctf_list_t *q = new;		/* q = new list element */
662
663	lp->l_prev = q;
664	q->l_prev = p;
665	q->l_next = NULL;
666
667	if (p != NULL)
668		p->l_next = q;
669	else
670		lp->l_next = q;
671}
672
673/*
674 * Prepend the specified existing element to the given ctf_list_t.  The
675 * existing pointer should be pointing at a struct with embedded ctf_list_t.
676 */
677static void
678ctf_list_prepend(ctf_list_t *lp, void *new)
679{
680	ctf_list_t *p = new;		/* p = new list element */
681	ctf_list_t *q = lp->l_next;	/* q = head list element */
682
683	lp->l_next = p;
684	p->l_prev = NULL;
685	p->l_next = q;
686
687	if (q != NULL)
688		q->l_prev = p;
689	else
690		lp->l_prev = p;
691}
692
693static void
694ctf_decl_init(ctf_decl_t *cd, char *buf, size_t len)
695{
696	int i;
697
698	bzero(cd, sizeof (ctf_decl_t));
699
700	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++)
701		cd->cd_order[i] = CTF_PREC_BASE - 1;
702
703	cd->cd_qualp = CTF_PREC_BASE;
704	cd->cd_ordp = CTF_PREC_BASE;
705
706	cd->cd_buf = buf;
707	cd->cd_ptr = buf;
708	cd->cd_end = buf + len;
709}
710
711static void
712ctf_decl_fini(ctf_decl_t *cd)
713{
714	ctf_decl_node_t *cdp, *ndp;
715	int i;
716
717	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++) {
718		for (cdp = ctf_list_next(&cd->cd_nodes[i]);
719		    cdp != NULL; cdp = ndp) {
720			ndp = ctf_list_next(cdp);
721			free(cdp, M_FBT);
722		}
723	}
724}
725
726static const ctf_type_t *
727ctf_lookup_by_id(linker_ctf_t *lc, ctf_id_t type)
728{
729	const ctf_type_t *tp;
730	uint32_t offset;
731	uint32_t *typoff = *lc->typoffp;
732
733	if (type >= *lc->typlenp) {
734		printf("%s(%d): type %d exceeds max %ld\n",__func__,__LINE__,(int) type,*lc->typlenp);
735		return(NULL);
736	}
737
738	/* Check if the type isn't cross-referenced. */
739	if ((offset = typoff[type]) == 0) {
740		printf("%s(%d): type %d isn't cross referenced\n",__func__,__LINE__, (int) type);
741		return(NULL);
742	}
743
744	tp = (const ctf_type_t *)(lc->ctftab + offset + sizeof(ctf_header_t));
745
746	return (tp);
747}
748
749static void
750fbt_array_info(linker_ctf_t *lc, ctf_id_t type, ctf_arinfo_t *arp)
751{
752	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
753	const ctf_type_t *tp;
754	const ctf_array_t *ap;
755	ssize_t increment;
756
757	bzero(arp, sizeof(*arp));
758
759	if ((tp = ctf_lookup_by_id(lc, type)) == NULL)
760		return;
761
762	if (CTF_INFO_KIND(tp->ctt_info) != CTF_K_ARRAY)
763		return;
764
765	(void) fbt_get_ctt_size(hp->cth_version, tp, NULL, &increment);
766
767	ap = (const ctf_array_t *)((uintptr_t)tp + increment);
768	arp->ctr_contents = ap->cta_contents;
769	arp->ctr_index = ap->cta_index;
770	arp->ctr_nelems = ap->cta_nelems;
771}
772
773static const char *
774ctf_strptr(linker_ctf_t *lc, int name)
775{
776	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;;
777	const char *strp = "";
778
779	if (name < 0 || name >= hp->cth_strlen)
780		return(strp);
781
782	strp = (const char *)(lc->ctftab + hp->cth_stroff + name + sizeof(ctf_header_t));
783
784	return (strp);
785}
786
787static void
788ctf_decl_push(ctf_decl_t *cd, linker_ctf_t *lc, ctf_id_t type)
789{
790	ctf_decl_node_t *cdp;
791	ctf_decl_prec_t prec;
792	uint_t kind, n = 1;
793	int is_qual = 0;
794
795	const ctf_type_t *tp;
796	ctf_arinfo_t ar;
797
798	if ((tp = ctf_lookup_by_id(lc, type)) == NULL) {
799		cd->cd_err = ENOENT;
800		return;
801	}
802
803	switch (kind = CTF_INFO_KIND(tp->ctt_info)) {
804	case CTF_K_ARRAY:
805		fbt_array_info(lc, type, &ar);
806		ctf_decl_push(cd, lc, ar.ctr_contents);
807		n = ar.ctr_nelems;
808		prec = CTF_PREC_ARRAY;
809		break;
810
811	case CTF_K_TYPEDEF:
812		if (ctf_strptr(lc, tp->ctt_name)[0] == '\0') {
813			ctf_decl_push(cd, lc, tp->ctt_type);
814			return;
815		}
816		prec = CTF_PREC_BASE;
817		break;
818
819	case CTF_K_FUNCTION:
820		ctf_decl_push(cd, lc, tp->ctt_type);
821		prec = CTF_PREC_FUNCTION;
822		break;
823
824	case CTF_K_POINTER:
825		ctf_decl_push(cd, lc, tp->ctt_type);
826		prec = CTF_PREC_POINTER;
827		break;
828
829	case CTF_K_VOLATILE:
830	case CTF_K_CONST:
831	case CTF_K_RESTRICT:
832		ctf_decl_push(cd, lc, tp->ctt_type);
833		prec = cd->cd_qualp;
834		is_qual++;
835		break;
836
837	default:
838		prec = CTF_PREC_BASE;
839	}
840
841	if ((cdp = malloc(sizeof (ctf_decl_node_t), M_FBT, M_WAITOK)) == NULL) {
842		cd->cd_err = EAGAIN;
843		return;
844	}
845
846	cdp->cd_type = type;
847	cdp->cd_kind = kind;
848	cdp->cd_n = n;
849
850	if (ctf_list_next(&cd->cd_nodes[prec]) == NULL)
851		cd->cd_order[prec] = cd->cd_ordp++;
852
853	/*
854	 * Reset cd_qualp to the highest precedence level that we've seen so
855	 * far that can be qualified (CTF_PREC_BASE or CTF_PREC_POINTER).
856	 */
857	if (prec > cd->cd_qualp && prec < CTF_PREC_ARRAY)
858		cd->cd_qualp = prec;
859
860	/*
861	 * C array declarators are ordered inside out so prepend them.  Also by
862	 * convention qualifiers of base types precede the type specifier (e.g.
863	 * const int vs. int const) even though the two forms are equivalent.
864	 */
865	if (kind == CTF_K_ARRAY || (is_qual && prec == CTF_PREC_BASE))
866		ctf_list_prepend(&cd->cd_nodes[prec], cdp);
867	else
868		ctf_list_append(&cd->cd_nodes[prec], cdp);
869}
870
871static void
872ctf_decl_sprintf(ctf_decl_t *cd, const char *format, ...)
873{
874	size_t len = (size_t)(cd->cd_end - cd->cd_ptr);
875	va_list ap;
876	size_t n;
877
878	va_start(ap, format);
879	n = vsnprintf(cd->cd_ptr, len, format, ap);
880	va_end(ap);
881
882	cd->cd_ptr += MIN(n, len);
883	cd->cd_len += n;
884}
885
886static ssize_t
887fbt_type_name(linker_ctf_t *lc, ctf_id_t type, char *buf, size_t len)
888{
889	ctf_decl_t cd;
890	ctf_decl_node_t *cdp;
891	ctf_decl_prec_t prec, lp, rp;
892	int ptr, arr;
893	uint_t k;
894
895	if (lc == NULL && type == CTF_ERR)
896		return (-1); /* simplify caller code by permitting CTF_ERR */
897
898	ctf_decl_init(&cd, buf, len);
899	ctf_decl_push(&cd, lc, type);
900
901	if (cd.cd_err != 0) {
902		ctf_decl_fini(&cd);
903		return (-1);
904	}
905
906	/*
907	 * If the type graph's order conflicts with lexical precedence order
908	 * for pointers or arrays, then we need to surround the declarations at
909	 * the corresponding lexical precedence with parentheses.  This can
910	 * result in either a parenthesized pointer (*) as in int (*)() or
911	 * int (*)[], or in a parenthesized pointer and array as in int (*[])().
912	 */
913	ptr = cd.cd_order[CTF_PREC_POINTER] > CTF_PREC_POINTER;
914	arr = cd.cd_order[CTF_PREC_ARRAY] > CTF_PREC_ARRAY;
915
916	rp = arr ? CTF_PREC_ARRAY : ptr ? CTF_PREC_POINTER : -1;
917	lp = ptr ? CTF_PREC_POINTER : arr ? CTF_PREC_ARRAY : -1;
918
919	k = CTF_K_POINTER; /* avoid leading whitespace (see below) */
920
921	for (prec = CTF_PREC_BASE; prec < CTF_PREC_MAX; prec++) {
922		for (cdp = ctf_list_next(&cd.cd_nodes[prec]);
923		    cdp != NULL; cdp = ctf_list_next(cdp)) {
924
925			const ctf_type_t *tp =
926			    ctf_lookup_by_id(lc, cdp->cd_type);
927			const char *name = ctf_strptr(lc, tp->ctt_name);
928
929			if (k != CTF_K_POINTER && k != CTF_K_ARRAY)
930				ctf_decl_sprintf(&cd, " ");
931
932			if (lp == prec) {
933				ctf_decl_sprintf(&cd, "(");
934				lp = -1;
935			}
936
937			switch (cdp->cd_kind) {
938			case CTF_K_INTEGER:
939			case CTF_K_FLOAT:
940			case CTF_K_TYPEDEF:
941				ctf_decl_sprintf(&cd, "%s", name);
942				break;
943			case CTF_K_POINTER:
944				ctf_decl_sprintf(&cd, "*");
945				break;
946			case CTF_K_ARRAY:
947				ctf_decl_sprintf(&cd, "[%u]", cdp->cd_n);
948				break;
949			case CTF_K_FUNCTION:
950				ctf_decl_sprintf(&cd, "()");
951				break;
952			case CTF_K_STRUCT:
953			case CTF_K_FORWARD:
954				ctf_decl_sprintf(&cd, "struct %s", name);
955				break;
956			case CTF_K_UNION:
957				ctf_decl_sprintf(&cd, "union %s", name);
958				break;
959			case CTF_K_ENUM:
960				ctf_decl_sprintf(&cd, "enum %s", name);
961				break;
962			case CTF_K_VOLATILE:
963				ctf_decl_sprintf(&cd, "volatile");
964				break;
965			case CTF_K_CONST:
966				ctf_decl_sprintf(&cd, "const");
967				break;
968			case CTF_K_RESTRICT:
969				ctf_decl_sprintf(&cd, "restrict");
970				break;
971			}
972
973			k = cdp->cd_kind;
974		}
975
976		if (rp == prec)
977			ctf_decl_sprintf(&cd, ")");
978	}
979
980	ctf_decl_fini(&cd);
981	return (cd.cd_len);
982}
983
984static void
985fbt_getargdesc(void *arg __unused, dtrace_id_t id __unused, void *parg, dtrace_argdesc_t *desc)
986{
987	const ushort_t *dp;
988	fbt_probe_t *fbt = parg;
989	linker_ctf_t lc;
990	modctl_t *ctl = fbt->fbtp_ctl;
991	int ndx = desc->dtargd_ndx;
992	int symindx = fbt->fbtp_symindx;
993	uint32_t *ctfoff;
994	uint32_t offset;
995	ushort_t info, kind, n;
996
997	if (fbt->fbtp_roffset != 0 && desc->dtargd_ndx == 0) {
998		(void) strcpy(desc->dtargd_native, "int");
999		return;
1000	}
1001
1002	desc->dtargd_ndx = DTRACE_ARGNONE;
1003
1004	/* Get a pointer to the CTF data and it's length. */
1005	if (linker_ctf_get(ctl, &lc) != 0)
1006		/* No CTF data? Something wrong? *shrug* */
1007		return;
1008
1009	/* Check if this module hasn't been initialised yet. */
1010	if (*lc.ctfoffp == NULL) {
1011		/*
1012		 * Initialise the CTF object and function symindx to
1013		 * byte offset array.
1014		 */
1015		if (fbt_ctfoff_init(ctl, &lc) != 0)
1016			return;
1017
1018		/* Initialise the CTF type to byte offset array. */
1019		if (fbt_typoff_init(&lc) != 0)
1020			return;
1021	}
1022
1023	ctfoff = *lc.ctfoffp;
1024
1025	if (ctfoff == NULL || *lc.typoffp == NULL)
1026		return;
1027
1028	/* Check if the symbol index is out of range. */
1029	if (symindx >= lc.nsym)
1030		return;
1031
1032	/* Check if the symbol isn't cross-referenced. */
1033	if ((offset = ctfoff[symindx]) == 0xffffffff)
1034		return;
1035
1036	dp = (const ushort_t *)(lc.ctftab + offset + sizeof(ctf_header_t));
1037
1038	info = *dp++;
1039	kind = CTF_INFO_KIND(info);
1040	n = CTF_INFO_VLEN(info);
1041
1042	if (kind == CTF_K_UNKNOWN && n == 0) {
1043		printf("%s(%d): Unknown function!\n",__func__,__LINE__);
1044		return;
1045	}
1046
1047	if (kind != CTF_K_FUNCTION) {
1048		printf("%s(%d): Expected a function!\n",__func__,__LINE__);
1049		return;
1050	}
1051
1052	if (fbt->fbtp_roffset != 0) {
1053		/* Only return type is available for args[1] in return probe. */
1054		if (ndx > 1)
1055			return;
1056		ASSERT(ndx == 1);
1057	} else {
1058		/* Check if the requested argument doesn't exist. */
1059		if (ndx >= n)
1060			return;
1061
1062		/* Skip the return type and arguments up to the one requested. */
1063		dp += ndx + 1;
1064	}
1065
1066	if (fbt_type_name(&lc, *dp, desc->dtargd_native, sizeof(desc->dtargd_native)) > 0)
1067		desc->dtargd_ndx = ndx;
1068
1069	return;
1070}
1071
1072static int
1073fbt_linker_file_cb(linker_file_t lf, void *arg)
1074{
1075
1076	fbt_provide_module(arg, lf);
1077
1078	return (0);
1079}
1080
1081static void
1082fbt_load(void *dummy)
1083{
1084	/* Create the /dev/dtrace/fbt entry. */
1085	fbt_cdev = make_dev(&fbt_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
1086	    "dtrace/fbt");
1087
1088	/* Default the probe table size if not specified. */
1089	if (fbt_probetab_size == 0)
1090		fbt_probetab_size = FBT_PROBETAB_SIZE;
1091
1092	/* Choose the hash mask for the probe table. */
1093	fbt_probetab_mask = fbt_probetab_size - 1;
1094
1095	/* Allocate memory for the probe table. */
1096	fbt_probetab =
1097	    malloc(fbt_probetab_size * sizeof (fbt_probe_t *), M_FBT, M_WAITOK | M_ZERO);
1098
1099	dtrace_doubletrap_func = fbt_doubletrap;
1100	dtrace_invop_add(fbt_invop);
1101
1102	if (dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_USER,
1103	    NULL, &fbt_pops, NULL, &fbt_id) != 0)
1104		return;
1105
1106	/* Create probes for the kernel and already-loaded modules. */
1107	linker_file_foreach(fbt_linker_file_cb, NULL);
1108}
1109
1110static int
1111fbt_unload()
1112{
1113	int error = 0;
1114
1115	/* De-register the invalid opcode handler. */
1116	dtrace_invop_remove(fbt_invop);
1117
1118	dtrace_doubletrap_func = NULL;
1119
1120	/* De-register this DTrace provider. */
1121	if ((error = dtrace_unregister(fbt_id)) != 0)
1122		return (error);
1123
1124	/* Free the probe table. */
1125	free(fbt_probetab, M_FBT);
1126	fbt_probetab = NULL;
1127	fbt_probetab_mask = 0;
1128
1129	destroy_dev(fbt_cdev);
1130
1131	return (error);
1132}
1133
1134static int
1135fbt_modevent(module_t mod __unused, int type, void *data __unused)
1136{
1137	int error = 0;
1138
1139	switch (type) {
1140	case MOD_LOAD:
1141		break;
1142
1143	case MOD_UNLOAD:
1144		break;
1145
1146	case MOD_SHUTDOWN:
1147		break;
1148
1149	default:
1150		error = EOPNOTSUPP;
1151		break;
1152
1153	}
1154
1155	return (error);
1156}
1157
1158static int
1159fbt_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused)
1160{
1161	return (0);
1162}
1163
1164SYSINIT(fbt_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_load, NULL);
1165SYSUNINIT(fbt_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_unload, NULL);
1166
1167DEV_MODULE(fbt, fbt_modevent, NULL);
1168MODULE_VERSION(fbt, 1);
1169MODULE_DEPEND(fbt, dtrace, 1, 1, 1);
1170MODULE_DEPEND(fbt, opensolaris, 1, 1, 1);
1171