fbt_powerpc.c revision 248457
1254219Scy/*
2254219Scy * CDDL HEADER START
3254219Scy *
4254219Scy * The contents of this file are subject to the terms of the
5254219Scy * Common Development and Distribution License (the "License").
6254219Scy * You may not use this file except in compliance with the License.
7254219Scy *
8254219Scy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9254219Scy * or http://www.opensolaris.org/os/licensing.
10254219Scy * See the License for the specific language governing permissions
11254219Scy * and limitations under the License.
12254219Scy *
13254219Scy * When distributing Covered Code, include this CDDL HEADER in each
14254219Scy * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15254219Scy * If applicable, add the following below this CDDL HEADER, with the
16254219Scy * fields enclosed by brackets "[]" replaced with your own identifying
17254219Scy * information: Portions Copyright [yyyy] [name of copyright owner]
18254219Scy *
19254219Scy * CDDL HEADER END
20254219Scy *
21254219Scy * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
22254219Scy * Portions Copyright 2013 Justin Hibbits jhibbits@freebsd.org
23254219Scy *
24254219Scy * $FreeBSD: head/sys/cddl/dev/fbt/fbt_powerpc.c 248457 2013-03-18 05:30:18Z jhibbits $
25254219Scy *
26254219Scy */
27254219Scy
28254219Scy/*
29254219Scy * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
30254219Scy * Use is subject to license terms.
31254219Scy */
32254219Scy
33254219Scy#include <sys/cdefs.h>
34254219Scy#include <sys/param.h>
35254219Scy#include <sys/systm.h>
36254219Scy#include <sys/conf.h>
37254219Scy#include <sys/cpuvar.h>
38254219Scy#include <sys/fcntl.h>
39254219Scy#include <sys/filio.h>
40254219Scy#include <sys/kdb.h>
41254219Scy#include <sys/kernel.h>
42254219Scy#include <sys/kmem.h>
43254219Scy#include <sys/kthread.h>
44254219Scy#include <sys/limits.h>
45254219Scy#include <sys/linker.h>
46254219Scy#include <sys/lock.h>
47254219Scy#include <sys/malloc.h>
48254219Scy#include <sys/module.h>
49254219Scy#include <sys/mutex.h>
50254219Scy#include <sys/pcpu.h>
51254219Scy#include <sys/poll.h>
52254219Scy#include <sys/proc.h>
53254219Scy#include <sys/selinfo.h>
54254219Scy#include <sys/smp.h>
55254219Scy#include <sys/syscall.h>
56254219Scy#include <sys/sysent.h>
57254219Scy#include <sys/sysproto.h>
58254219Scy#include <sys/uio.h>
59254219Scy#include <sys/unistd.h>
60254219Scy#include <machine/stdarg.h>
61254219Scy
62254219Scy#include <sys/dtrace.h>
63254219Scy#include <sys/dtrace_bsd.h>
64254219Scy
65254219Scystatic MALLOC_DEFINE(M_FBT, "fbt", "Function Boundary Tracing");
66254219Scy
67254219Scy#define FBT_PATCHVAL		0x7c810808
68254219Scy#define FBT_MFLR_R0		0x7c0802a6
69254219Scy#define FBT_MTLR_R0		0x7c0803a6
70254219Scy#define FBT_BLR			0x4e800020
71#define FBT_BCTR		0x4e800030
72#define FBT_BRANCH		0x48000000
73#define FBT_BR_MASK		0x03fffffc
74#define FBT_IS_JUMP(instr)	((instr & ~FBT_BR_MASK) == FBT_BRANCH)
75
76static d_open_t	fbt_open;
77static int	fbt_unload(void);
78static void	fbt_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
79static void	fbt_provide_module(void *, modctl_t *);
80static void	fbt_destroy(void *, dtrace_id_t, void *);
81static void	fbt_enable(void *, dtrace_id_t, void *);
82static void	fbt_disable(void *, dtrace_id_t, void *);
83static void	fbt_load(void *);
84static void	fbt_suspend(void *, dtrace_id_t, void *);
85static void	fbt_resume(void *, dtrace_id_t, void *);
86
87#define	FBT_ENTRY	"entry"
88#define	FBT_RETURN	"return"
89#define	FBT_ADDR2NDX(addr)	((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
90#define	FBT_PROBETAB_SIZE	0x8000		/* 32k entries -- 128K total */
91
92static struct cdevsw fbt_cdevsw = {
93	.d_version	= D_VERSION,
94	.d_open		= fbt_open,
95	.d_name		= "fbt",
96};
97
98static dtrace_pattr_t fbt_attr = {
99{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
100{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
101{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
102{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
103{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
104};
105
106static dtrace_pops_t fbt_pops = {
107	NULL,
108	fbt_provide_module,
109	fbt_enable,
110	fbt_disable,
111	fbt_suspend,
112	fbt_resume,
113	fbt_getargdesc,
114	NULL,
115	NULL,
116	fbt_destroy
117};
118
119typedef struct fbt_probe {
120	struct fbt_probe *fbtp_hashnext;
121	uint32_t	*fbtp_patchpoint;
122	int8_t		fbtp_rval;
123	uint32_t	fbtp_patchval;
124	uint32_t	fbtp_savedval;
125	uintptr_t	fbtp_roffset;
126	dtrace_id_t	fbtp_id;
127	const char	*fbtp_name;
128	modctl_t	*fbtp_ctl;
129	int		fbtp_loadcnt;
130	int		fbtp_primary;
131	int		fbtp_invop_cnt;
132	int		fbtp_symindx;
133	struct fbt_probe *fbtp_next;
134} fbt_probe_t;
135
136static struct cdev		*fbt_cdev;
137static dtrace_provider_id_t	fbt_id;
138static fbt_probe_t		**fbt_probetab;
139static int			fbt_probetab_size;
140static int			fbt_probetab_mask;
141static int			fbt_verbose = 0;
142
143static int
144fbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval)
145{
146	struct trapframe *frame = (struct trapframe *)stack;
147	solaris_cpu_t *cpu = &solaris_cpu[curcpu];
148	fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)];
149	uintptr_t tmp;
150
151	for (; fbt != NULL; fbt = fbt->fbtp_hashnext) {
152		if ((uintptr_t)fbt->fbtp_patchpoint == addr) {
153			fbt->fbtp_invop_cnt++;
154			if (fbt->fbtp_roffset == 0) {
155				cpu->cpu_dtrace_caller = addr;
156
157				dtrace_probe(fbt->fbtp_id, frame->fixreg[3],
158				    frame->fixreg[4], frame->fixreg[5],
159				    frame->fixreg[6], frame->fixreg[7]);
160
161				cpu->cpu_dtrace_caller = 0;
162			} else {
163
164				dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset,
165				    rval, 0, 0, 0);
166				/*
167				 * The caller doesn't have the fbt item, so
168				 * fixup tail calls here.
169				 */
170				if (fbt->fbtp_rval == DTRACE_INVOP_JUMP) {
171					frame->srr0 = (uintptr_t)fbt->fbtp_patchpoint;
172					tmp = fbt->fbtp_savedval & FBT_BR_MASK;
173					/* Sign extend. */
174					if (tmp & 0x02000000)
175						tmp |= 0xFC000000;
176					frame->srr0 += tmp;
177				}
178				cpu->cpu_dtrace_caller = 0;
179			}
180
181			return (fbt->fbtp_rval);
182		}
183	}
184
185	return (0);
186}
187
188static int
189fbt_provide_module_function(linker_file_t lf, int symindx,
190    linker_symval_t *symval, void *opaque)
191{
192	char *modname = opaque;
193	const char *name = symval->name;
194	fbt_probe_t *fbt, *retfbt;
195	int j;
196	int size;
197	u_int32_t *instr, *limit;
198
199	if (strncmp(name, "dtrace_", 7) == 0 &&
200	    strncmp(name, "dtrace_safe_", 12) != 0) {
201		/*
202		 * Anything beginning with "dtrace_" may be called
203		 * from probe context unless it explicitly indicates
204		 * that it won't be called from probe context by
205		 * using the prefix "dtrace_safe_".
206		 */
207		return (0);
208	}
209
210	if (name[0] == '_' && name[1] == '_')
211		return (0);
212
213	size = symval->size;
214
215	instr = (u_int32_t *) symval->value;
216	limit = (u_int32_t *) symval->value + symval->size;
217
218	for (; instr < limit; instr++)
219		if (*instr == FBT_MFLR_R0)
220			break;
221
222	if (*instr != FBT_MFLR_R0);
223		return (0);
224
225	fbt = malloc(sizeof (fbt_probe_t), M_FBT, M_WAITOK | M_ZERO);
226	fbt->fbtp_name = name;
227	fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
228	    name, FBT_ENTRY, 3, fbt);
229	fbt->fbtp_patchpoint = instr;
230	fbt->fbtp_ctl = lf;
231	fbt->fbtp_loadcnt = lf->loadcnt;
232	fbt->fbtp_savedval = *instr;
233	fbt->fbtp_patchval = FBT_PATCHVAL;
234	fbt->fbtp_rval = DTRACE_INVOP_MFLR_R0;
235	fbt->fbtp_symindx = symindx;
236
237	fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
238	fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
239
240	lf->fbt_nentries++;
241
242	retfbt = NULL;
243again:
244	if (instr >= limit)
245		return (0);
246
247	/*
248	 * We (desperately) want to avoid erroneously instrumenting a
249	 * jump table To determine if we're looking at a true instruction
250	 * sequence or an inline jump table that happens to contain the same
251	 * byte sequences, we resort to some heuristic sleeze:  we treat this
252	 * instruction as being contained within a pointer, and see if that
253	 * pointer points to within the body of the function.  If it does, we
254	 * refuse to instrument it.
255	 */
256	{
257		uint32_t *ptr;
258
259		ptr = *(uint32_t **)instr;
260
261		if (ptr >= (uint32_t *) symval->value && ptr < limit) {
262			instr++;
263			goto again;
264		}
265	}
266
267	if (*instr == FBT_MFLR_R0)
268		return (0);
269
270	if (*instr != FBT_MTLR_R0) {
271		instr++;
272		goto again;
273	}
274
275	instr++;
276
277	for (j = 0; j < 12 && instr < limit; j++, instr++) {
278		if ((*instr == FBT_BCTR) || (*instr == FBT_BLR) |
279		    FBT_IS_JUMP(*instr))
280			break;
281	}
282
283	if (!(*instr == FBT_BCTR || *instr == FBT_BLR || FBT_IS_JUMP(*instr)))
284		goto again;
285
286	/*
287	 * We have a winner!
288	 */
289	fbt = malloc(sizeof (fbt_probe_t), M_FBT, M_WAITOK | M_ZERO);
290	fbt->fbtp_name = name;
291
292	if (retfbt == NULL) {
293		fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
294		    name, FBT_RETURN, 3, fbt);
295	} else {
296		retfbt->fbtp_next = fbt;
297		fbt->fbtp_id = retfbt->fbtp_id;
298	}
299
300	retfbt = fbt;
301	fbt->fbtp_patchpoint = instr;
302	fbt->fbtp_ctl = lf;
303	fbt->fbtp_loadcnt = lf->loadcnt;
304	fbt->fbtp_symindx = symindx;
305
306	if (*instr == FBT_BCTR)
307		fbt->fbtp_rval = DTRACE_INVOP_BCTR;
308	else if (*instr == FBT_BLR)
309		fbt->fbtp_rval = DTRACE_INVOP_RET;
310	else
311		fbt->fbtp_rval = DTRACE_INVOP_JUMP;
312
313	fbt->fbtp_savedval = *instr;
314	fbt->fbtp_patchval = FBT_PATCHVAL;
315	fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
316	fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
317
318	lf->fbt_nentries++;
319
320	instr += size;
321	goto again;
322}
323
324static void
325fbt_provide_module(void *arg, modctl_t *lf)
326{
327	char modname[MAXPATHLEN];
328	int i;
329	size_t len;
330
331	strlcpy(modname, lf->filename, sizeof(modname));
332	len = strlen(modname);
333	if (len > 3 && strcmp(modname + len - 3, ".ko") == 0)
334		modname[len - 3] = '\0';
335
336	/*
337	 * Employees of dtrace and their families are ineligible.  Void
338	 * where prohibited.
339	 */
340	if (strcmp(modname, "dtrace") == 0)
341		return;
342
343	/*
344	 * The cyclic timer subsystem can be built as a module and DTrace
345	 * depends on that, so it is ineligible too.
346	 */
347	if (strcmp(modname, "cyclic") == 0)
348		return;
349
350	/*
351	 * To register with DTrace, a module must list 'dtrace' as a
352	 * dependency in order for the kernel linker to resolve
353	 * symbols like dtrace_register(). All modules with such a
354	 * dependency are ineligible for FBT tracing.
355	 */
356	for (i = 0; i < lf->ndeps; i++)
357		if (strncmp(lf->deps[i]->filename, "dtrace", 6) == 0)
358			return;
359
360	if (lf->fbt_nentries) {
361		/*
362		 * This module has some FBT entries allocated; we're afraid
363		 * to screw with it.
364		 */
365		return;
366	}
367
368	/*
369	 * List the functions in the module and the symbol values.
370	 */
371	(void) linker_file_function_listall(lf, fbt_provide_module_function, modname);
372}
373
374static void
375fbt_destroy(void *arg, dtrace_id_t id, void *parg)
376{
377	fbt_probe_t *fbt = parg, *next, *hash, *last;
378	modctl_t *ctl;
379	int ndx;
380
381	do {
382		ctl = fbt->fbtp_ctl;
383
384		ctl->fbt_nentries--;
385
386		/*
387		 * Now we need to remove this probe from the fbt_probetab.
388		 */
389		ndx = FBT_ADDR2NDX(fbt->fbtp_patchpoint);
390		last = NULL;
391		hash = fbt_probetab[ndx];
392
393		while (hash != fbt) {
394			ASSERT(hash != NULL);
395			last = hash;
396			hash = hash->fbtp_hashnext;
397		}
398
399		if (last != NULL) {
400			last->fbtp_hashnext = fbt->fbtp_hashnext;
401		} else {
402			fbt_probetab[ndx] = fbt->fbtp_hashnext;
403		}
404
405		next = fbt->fbtp_next;
406		free(fbt, M_FBT);
407
408		fbt = next;
409	} while (fbt != NULL);
410}
411
412static void
413fbt_enable(void *arg, dtrace_id_t id, void *parg)
414{
415	fbt_probe_t *fbt = parg;
416	modctl_t *ctl = fbt->fbtp_ctl;
417
418	ctl->nenabled++;
419
420	/*
421	 * Now check that our modctl has the expected load count.  If it
422	 * doesn't, this module must have been unloaded and reloaded -- and
423	 * we're not going to touch it.
424	 */
425	if (ctl->loadcnt != fbt->fbtp_loadcnt) {
426		if (fbt_verbose) {
427			printf("fbt is failing for probe %s "
428			    "(module %s reloaded)",
429			    fbt->fbtp_name, ctl->filename);
430		}
431
432		return;
433	}
434
435	for (; fbt != NULL; fbt = fbt->fbtp_next) {
436		*fbt->fbtp_patchpoint = fbt->fbtp_patchval;
437	}
438}
439
440static void
441fbt_disable(void *arg, dtrace_id_t id, void *parg)
442{
443	fbt_probe_t *fbt = parg;
444	modctl_t *ctl = fbt->fbtp_ctl;
445
446	ASSERT(ctl->nenabled > 0);
447	ctl->nenabled--;
448
449	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
450		return;
451
452	for (; fbt != NULL; fbt = fbt->fbtp_next)
453		*fbt->fbtp_patchpoint = fbt->fbtp_savedval;
454}
455
456static void
457fbt_suspend(void *arg, dtrace_id_t id, void *parg)
458{
459	fbt_probe_t *fbt = parg;
460	modctl_t *ctl = fbt->fbtp_ctl;
461
462	ASSERT(ctl->nenabled > 0);
463
464	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
465		return;
466
467	for (; fbt != NULL; fbt = fbt->fbtp_next)
468		*fbt->fbtp_patchpoint = fbt->fbtp_savedval;
469}
470
471static void
472fbt_resume(void *arg, dtrace_id_t id, void *parg)
473{
474	fbt_probe_t *fbt = parg;
475	modctl_t *ctl = fbt->fbtp_ctl;
476
477	ASSERT(ctl->nenabled > 0);
478
479	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
480		return;
481
482	for (; fbt != NULL; fbt = fbt->fbtp_next)
483		*fbt->fbtp_patchpoint = fbt->fbtp_patchval;
484}
485
486static int
487fbt_ctfoff_init(modctl_t *lf, linker_ctf_t *lc)
488{
489	const Elf_Sym *symp = lc->symtab;;
490	const char *name;
491	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
492	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
493	int i;
494	uint32_t *ctfoff;
495	uint32_t objtoff = hp->cth_objtoff;
496	uint32_t funcoff = hp->cth_funcoff;
497	ushort_t info;
498	ushort_t vlen;
499
500	/* Sanity check. */
501	if (hp->cth_magic != CTF_MAGIC) {
502		printf("Bad magic value in CTF data of '%s'\n",lf->pathname);
503		return (EINVAL);
504	}
505
506	if (lc->symtab == NULL) {
507		printf("No symbol table in '%s'\n",lf->pathname);
508		return (EINVAL);
509	}
510
511	if ((ctfoff = malloc(sizeof(uint32_t) * lc->nsym, M_LINKER, M_WAITOK)) == NULL)
512		return (ENOMEM);
513
514	*lc->ctfoffp = ctfoff;
515
516	for (i = 0; i < lc->nsym; i++, ctfoff++, symp++) {
517		if (symp->st_name == 0 || symp->st_shndx == SHN_UNDEF) {
518			*ctfoff = 0xffffffff;
519			continue;
520		}
521
522		if (symp->st_name < lc->strcnt)
523			name = lc->strtab + symp->st_name;
524		else
525			name = "(?)";
526
527		switch (ELF_ST_TYPE(symp->st_info)) {
528		case STT_OBJECT:
529			if (objtoff >= hp->cth_funcoff ||
530                            (symp->st_shndx == SHN_ABS && symp->st_value == 0)) {
531				*ctfoff = 0xffffffff;
532                                break;
533                        }
534
535                        *ctfoff = objtoff;
536                        objtoff += sizeof (ushort_t);
537			break;
538
539		case STT_FUNC:
540			if (funcoff >= hp->cth_typeoff) {
541				*ctfoff = 0xffffffff;
542				break;
543			}
544
545			*ctfoff = funcoff;
546
547			info = *((const ushort_t *)(ctfdata + funcoff));
548			vlen = CTF_INFO_VLEN(info);
549
550			/*
551			 * If we encounter a zero pad at the end, just skip it.
552			 * Otherwise skip over the function and its return type
553			 * (+2) and the argument list (vlen).
554			 */
555			if (CTF_INFO_KIND(info) == CTF_K_UNKNOWN && vlen == 0)
556				funcoff += sizeof (ushort_t); /* skip pad */
557			else
558				funcoff += sizeof (ushort_t) * (vlen + 2);
559			break;
560
561		default:
562			*ctfoff = 0xffffffff;
563			break;
564		}
565	}
566
567	return (0);
568}
569
570static ssize_t
571fbt_get_ctt_size(uint8_t version, const ctf_type_t *tp, ssize_t *sizep,
572    ssize_t *incrementp)
573{
574	ssize_t size, increment;
575
576	if (version > CTF_VERSION_1 &&
577	    tp->ctt_size == CTF_LSIZE_SENT) {
578		size = CTF_TYPE_LSIZE(tp);
579		increment = sizeof (ctf_type_t);
580	} else {
581		size = tp->ctt_size;
582		increment = sizeof (ctf_stype_t);
583	}
584
585	if (sizep)
586		*sizep = size;
587	if (incrementp)
588		*incrementp = increment;
589
590	return (size);
591}
592
593static int
594fbt_typoff_init(linker_ctf_t *lc)
595{
596	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
597	const ctf_type_t *tbuf;
598	const ctf_type_t *tend;
599	const ctf_type_t *tp;
600	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
601	int ctf_typemax = 0;
602	uint32_t *xp;
603	ulong_t pop[CTF_K_MAX + 1] = { 0 };
604
605
606	/* Sanity check. */
607	if (hp->cth_magic != CTF_MAGIC)
608		return (EINVAL);
609
610	tbuf = (const ctf_type_t *) (ctfdata + hp->cth_typeoff);
611	tend = (const ctf_type_t *) (ctfdata + hp->cth_stroff);
612
613	int child = hp->cth_parname != 0;
614
615	/*
616	 * We make two passes through the entire type section.  In this first
617	 * pass, we count the number of each type and the total number of types.
618	 */
619	for (tp = tbuf; tp < tend; ctf_typemax++) {
620		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
621		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
622		ssize_t size, increment;
623
624		size_t vbytes;
625		uint_t n;
626
627		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
628
629		switch (kind) {
630		case CTF_K_INTEGER:
631		case CTF_K_FLOAT:
632			vbytes = sizeof (uint_t);
633			break;
634		case CTF_K_ARRAY:
635			vbytes = sizeof (ctf_array_t);
636			break;
637		case CTF_K_FUNCTION:
638			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
639			break;
640		case CTF_K_STRUCT:
641		case CTF_K_UNION:
642			if (size < CTF_LSTRUCT_THRESH) {
643				ctf_member_t *mp = (ctf_member_t *)
644				    ((uintptr_t)tp + increment);
645
646				vbytes = sizeof (ctf_member_t) * vlen;
647				for (n = vlen; n != 0; n--, mp++)
648					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
649			} else {
650				ctf_lmember_t *lmp = (ctf_lmember_t *)
651				    ((uintptr_t)tp + increment);
652
653				vbytes = sizeof (ctf_lmember_t) * vlen;
654				for (n = vlen; n != 0; n--, lmp++)
655					child |=
656					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
657			}
658			break;
659		case CTF_K_ENUM:
660			vbytes = sizeof (ctf_enum_t) * vlen;
661			break;
662		case CTF_K_FORWARD:
663			/*
664			 * For forward declarations, ctt_type is the CTF_K_*
665			 * kind for the tag, so bump that population count too.
666			 * If ctt_type is unknown, treat the tag as a struct.
667			 */
668			if (tp->ctt_type == CTF_K_UNKNOWN ||
669			    tp->ctt_type >= CTF_K_MAX)
670				pop[CTF_K_STRUCT]++;
671			else
672				pop[tp->ctt_type]++;
673			/*FALLTHRU*/
674		case CTF_K_UNKNOWN:
675			vbytes = 0;
676			break;
677		case CTF_K_POINTER:
678		case CTF_K_TYPEDEF:
679		case CTF_K_VOLATILE:
680		case CTF_K_CONST:
681		case CTF_K_RESTRICT:
682			child |= CTF_TYPE_ISCHILD(tp->ctt_type);
683			vbytes = 0;
684			break;
685		default:
686			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
687			return (EIO);
688		}
689		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
690		pop[kind]++;
691	}
692
693	*lc->typlenp = ctf_typemax;
694
695	if ((xp = malloc(sizeof(uint32_t) * ctf_typemax, M_LINKER, M_ZERO | M_WAITOK)) == NULL)
696		return (ENOMEM);
697
698	*lc->typoffp = xp;
699
700	/* type id 0 is used as a sentinel value */
701	*xp++ = 0;
702
703	/*
704	 * In the second pass, fill in the type offset.
705	 */
706	for (tp = tbuf; tp < tend; xp++) {
707		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
708		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
709		ssize_t size, increment;
710
711		size_t vbytes;
712		uint_t n;
713
714		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
715
716		switch (kind) {
717		case CTF_K_INTEGER:
718		case CTF_K_FLOAT:
719			vbytes = sizeof (uint_t);
720			break;
721		case CTF_K_ARRAY:
722			vbytes = sizeof (ctf_array_t);
723			break;
724		case CTF_K_FUNCTION:
725			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
726			break;
727		case CTF_K_STRUCT:
728		case CTF_K_UNION:
729			if (size < CTF_LSTRUCT_THRESH) {
730				ctf_member_t *mp = (ctf_member_t *)
731				    ((uintptr_t)tp + increment);
732
733				vbytes = sizeof (ctf_member_t) * vlen;
734				for (n = vlen; n != 0; n--, mp++)
735					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
736			} else {
737				ctf_lmember_t *lmp = (ctf_lmember_t *)
738				    ((uintptr_t)tp + increment);
739
740				vbytes = sizeof (ctf_lmember_t) * vlen;
741				for (n = vlen; n != 0; n--, lmp++)
742					child |=
743					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
744			}
745			break;
746		case CTF_K_ENUM:
747			vbytes = sizeof (ctf_enum_t) * vlen;
748			break;
749		case CTF_K_FORWARD:
750		case CTF_K_UNKNOWN:
751			vbytes = 0;
752			break;
753		case CTF_K_POINTER:
754		case CTF_K_TYPEDEF:
755		case CTF_K_VOLATILE:
756		case CTF_K_CONST:
757		case CTF_K_RESTRICT:
758			vbytes = 0;
759			break;
760		default:
761			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
762			return (EIO);
763		}
764		*xp = (uint32_t)((uintptr_t) tp - (uintptr_t) ctfdata);
765		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
766	}
767
768	return (0);
769}
770
771/*
772 * CTF Declaration Stack
773 *
774 * In order to implement ctf_type_name(), we must convert a type graph back
775 * into a C type declaration.  Unfortunately, a type graph represents a storage
776 * class ordering of the type whereas a type declaration must obey the C rules
777 * for operator precedence, and the two orderings are frequently in conflict.
778 * For example, consider these CTF type graphs and their C declarations:
779 *
780 * CTF_K_POINTER -> CTF_K_FUNCTION -> CTF_K_INTEGER  : int (*)()
781 * CTF_K_POINTER -> CTF_K_ARRAY -> CTF_K_INTEGER     : int (*)[]
782 *
783 * In each case, parentheses are used to raise operator * to higher lexical
784 * precedence, so the string form of the C declaration cannot be constructed by
785 * walking the type graph links and forming the string from left to right.
786 *
787 * The functions in this file build a set of stacks from the type graph nodes
788 * corresponding to the C operator precedence levels in the appropriate order.
789 * The code in ctf_type_name() can then iterate over the levels and nodes in
790 * lexical precedence order and construct the final C declaration string.
791 */
792typedef struct ctf_list {
793	struct ctf_list *l_prev; /* previous pointer or tail pointer */
794	struct ctf_list *l_next; /* next pointer or head pointer */
795} ctf_list_t;
796
797#define	ctf_list_prev(elem)	((void *)(((ctf_list_t *)(elem))->l_prev))
798#define	ctf_list_next(elem)	((void *)(((ctf_list_t *)(elem))->l_next))
799
800typedef enum {
801	CTF_PREC_BASE,
802	CTF_PREC_POINTER,
803	CTF_PREC_ARRAY,
804	CTF_PREC_FUNCTION,
805	CTF_PREC_MAX
806} ctf_decl_prec_t;
807
808typedef struct ctf_decl_node {
809	ctf_list_t cd_list;			/* linked list pointers */
810	ctf_id_t cd_type;			/* type identifier */
811	uint_t cd_kind;				/* type kind */
812	uint_t cd_n;				/* type dimension if array */
813} ctf_decl_node_t;
814
815typedef struct ctf_decl {
816	ctf_list_t cd_nodes[CTF_PREC_MAX];	/* declaration node stacks */
817	int cd_order[CTF_PREC_MAX];		/* storage order of decls */
818	ctf_decl_prec_t cd_qualp;		/* qualifier precision */
819	ctf_decl_prec_t cd_ordp;		/* ordered precision */
820	char *cd_buf;				/* buffer for output */
821	char *cd_ptr;				/* buffer location */
822	char *cd_end;				/* buffer limit */
823	size_t cd_len;				/* buffer space required */
824	int cd_err;				/* saved error value */
825} ctf_decl_t;
826
827/*
828 * Simple doubly-linked list append routine.  This implementation assumes that
829 * each list element contains an embedded ctf_list_t as the first member.
830 * An additional ctf_list_t is used to store the head (l_next) and tail
831 * (l_prev) pointers.  The current head and tail list elements have their
832 * previous and next pointers set to NULL, respectively.
833 */
834static void
835ctf_list_append(ctf_list_t *lp, void *new)
836{
837	ctf_list_t *p = lp->l_prev;	/* p = tail list element */
838	ctf_list_t *q = new;		/* q = new list element */
839
840	lp->l_prev = q;
841	q->l_prev = p;
842	q->l_next = NULL;
843
844	if (p != NULL)
845		p->l_next = q;
846	else
847		lp->l_next = q;
848}
849
850/*
851 * Prepend the specified existing element to the given ctf_list_t.  The
852 * existing pointer should be pointing at a struct with embedded ctf_list_t.
853 */
854static void
855ctf_list_prepend(ctf_list_t *lp, void *new)
856{
857	ctf_list_t *p = new;		/* p = new list element */
858	ctf_list_t *q = lp->l_next;	/* q = head list element */
859
860	lp->l_next = p;
861	p->l_prev = NULL;
862	p->l_next = q;
863
864	if (q != NULL)
865		q->l_prev = p;
866	else
867		lp->l_prev = p;
868}
869
870static void
871ctf_decl_init(ctf_decl_t *cd, char *buf, size_t len)
872{
873	int i;
874
875	bzero(cd, sizeof (ctf_decl_t));
876
877	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++)
878		cd->cd_order[i] = CTF_PREC_BASE - 1;
879
880	cd->cd_qualp = CTF_PREC_BASE;
881	cd->cd_ordp = CTF_PREC_BASE;
882
883	cd->cd_buf = buf;
884	cd->cd_ptr = buf;
885	cd->cd_end = buf + len;
886}
887
888static void
889ctf_decl_fini(ctf_decl_t *cd)
890{
891	ctf_decl_node_t *cdp, *ndp;
892	int i;
893
894	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++) {
895		for (cdp = ctf_list_next(&cd->cd_nodes[i]);
896		    cdp != NULL; cdp = ndp) {
897			ndp = ctf_list_next(cdp);
898			free(cdp, M_FBT);
899		}
900	}
901}
902
903static const ctf_type_t *
904ctf_lookup_by_id(linker_ctf_t *lc, ctf_id_t type)
905{
906	const ctf_type_t *tp;
907	uint32_t offset;
908	uint32_t *typoff = *lc->typoffp;
909
910	if (type >= *lc->typlenp) {
911		printf("%s(%d): type %d exceeds max %ld\n",__func__,__LINE__,(int) type,*lc->typlenp);
912		return(NULL);
913	}
914
915	/* Check if the type isn't cross-referenced. */
916	if ((offset = typoff[type]) == 0) {
917		printf("%s(%d): type %d isn't cross referenced\n",__func__,__LINE__, (int) type);
918		return(NULL);
919	}
920
921	tp = (const ctf_type_t *)(lc->ctftab + offset + sizeof(ctf_header_t));
922
923	return (tp);
924}
925
926static void
927fbt_array_info(linker_ctf_t *lc, ctf_id_t type, ctf_arinfo_t *arp)
928{
929	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
930	const ctf_type_t *tp;
931	const ctf_array_t *ap;
932	ssize_t increment;
933
934	bzero(arp, sizeof(*arp));
935
936	if ((tp = ctf_lookup_by_id(lc, type)) == NULL)
937		return;
938
939	if (CTF_INFO_KIND(tp->ctt_info) != CTF_K_ARRAY)
940		return;
941
942	(void) fbt_get_ctt_size(hp->cth_version, tp, NULL, &increment);
943
944	ap = (const ctf_array_t *)((uintptr_t)tp + increment);
945	arp->ctr_contents = ap->cta_contents;
946	arp->ctr_index = ap->cta_index;
947	arp->ctr_nelems = ap->cta_nelems;
948}
949
950static const char *
951ctf_strptr(linker_ctf_t *lc, int name)
952{
953	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;;
954	const char *strp = "";
955
956	if (name < 0 || name >= hp->cth_strlen)
957		return(strp);
958
959	strp = (const char *)(lc->ctftab + hp->cth_stroff + name + sizeof(ctf_header_t));
960
961	return (strp);
962}
963
964static void
965ctf_decl_push(ctf_decl_t *cd, linker_ctf_t *lc, ctf_id_t type)
966{
967	ctf_decl_node_t *cdp;
968	ctf_decl_prec_t prec;
969	uint_t kind, n = 1;
970	int is_qual = 0;
971
972	const ctf_type_t *tp;
973	ctf_arinfo_t ar;
974
975	if ((tp = ctf_lookup_by_id(lc, type)) == NULL) {
976		cd->cd_err = ENOENT;
977		return;
978	}
979
980	switch (kind = CTF_INFO_KIND(tp->ctt_info)) {
981	case CTF_K_ARRAY:
982		fbt_array_info(lc, type, &ar);
983		ctf_decl_push(cd, lc, ar.ctr_contents);
984		n = ar.ctr_nelems;
985		prec = CTF_PREC_ARRAY;
986		break;
987
988	case CTF_K_TYPEDEF:
989		if (ctf_strptr(lc, tp->ctt_name)[0] == '\0') {
990			ctf_decl_push(cd, lc, tp->ctt_type);
991			return;
992		}
993		prec = CTF_PREC_BASE;
994		break;
995
996	case CTF_K_FUNCTION:
997		ctf_decl_push(cd, lc, tp->ctt_type);
998		prec = CTF_PREC_FUNCTION;
999		break;
1000
1001	case CTF_K_POINTER:
1002		ctf_decl_push(cd, lc, tp->ctt_type);
1003		prec = CTF_PREC_POINTER;
1004		break;
1005
1006	case CTF_K_VOLATILE:
1007	case CTF_K_CONST:
1008	case CTF_K_RESTRICT:
1009		ctf_decl_push(cd, lc, tp->ctt_type);
1010		prec = cd->cd_qualp;
1011		is_qual++;
1012		break;
1013
1014	default:
1015		prec = CTF_PREC_BASE;
1016	}
1017
1018	if ((cdp = malloc(sizeof (ctf_decl_node_t), M_FBT, M_WAITOK)) == NULL) {
1019		cd->cd_err = EAGAIN;
1020		return;
1021	}
1022
1023	cdp->cd_type = type;
1024	cdp->cd_kind = kind;
1025	cdp->cd_n = n;
1026
1027	if (ctf_list_next(&cd->cd_nodes[prec]) == NULL)
1028		cd->cd_order[prec] = cd->cd_ordp++;
1029
1030	/*
1031	 * Reset cd_qualp to the highest precedence level that we've seen so
1032	 * far that can be qualified (CTF_PREC_BASE or CTF_PREC_POINTER).
1033	 */
1034	if (prec > cd->cd_qualp && prec < CTF_PREC_ARRAY)
1035		cd->cd_qualp = prec;
1036
1037	/*
1038	 * C array declarators are ordered inside out so prepend them.  Also by
1039	 * convention qualifiers of base types precede the type specifier (e.g.
1040	 * const int vs. int const) even though the two forms are equivalent.
1041	 */
1042	if (kind == CTF_K_ARRAY || (is_qual && prec == CTF_PREC_BASE))
1043		ctf_list_prepend(&cd->cd_nodes[prec], cdp);
1044	else
1045		ctf_list_append(&cd->cd_nodes[prec], cdp);
1046}
1047
1048static void
1049ctf_decl_sprintf(ctf_decl_t *cd, const char *format, ...)
1050{
1051	size_t len = (size_t)(cd->cd_end - cd->cd_ptr);
1052	va_list ap;
1053	size_t n;
1054
1055	va_start(ap, format);
1056	n = vsnprintf(cd->cd_ptr, len, format, ap);
1057	va_end(ap);
1058
1059	cd->cd_ptr += MIN(n, len);
1060	cd->cd_len += n;
1061}
1062
1063static ssize_t
1064fbt_type_name(linker_ctf_t *lc, ctf_id_t type, char *buf, size_t len)
1065{
1066	ctf_decl_t cd;
1067	ctf_decl_node_t *cdp;
1068	ctf_decl_prec_t prec, lp, rp;
1069	int ptr, arr;
1070	uint_t k;
1071
1072	if (lc == NULL && type == CTF_ERR)
1073		return (-1); /* simplify caller code by permitting CTF_ERR */
1074
1075	ctf_decl_init(&cd, buf, len);
1076	ctf_decl_push(&cd, lc, type);
1077
1078	if (cd.cd_err != 0) {
1079		ctf_decl_fini(&cd);
1080		return (-1);
1081	}
1082
1083	/*
1084	 * If the type graph's order conflicts with lexical precedence order
1085	 * for pointers or arrays, then we need to surround the declarations at
1086	 * the corresponding lexical precedence with parentheses.  This can
1087	 * result in either a parenthesized pointer (*) as in int (*)() or
1088	 * int (*)[], or in a parenthesized pointer and array as in int (*[])().
1089	 */
1090	ptr = cd.cd_order[CTF_PREC_POINTER] > CTF_PREC_POINTER;
1091	arr = cd.cd_order[CTF_PREC_ARRAY] > CTF_PREC_ARRAY;
1092
1093	rp = arr ? CTF_PREC_ARRAY : ptr ? CTF_PREC_POINTER : -1;
1094	lp = ptr ? CTF_PREC_POINTER : arr ? CTF_PREC_ARRAY : -1;
1095
1096	k = CTF_K_POINTER; /* avoid leading whitespace (see below) */
1097
1098	for (prec = CTF_PREC_BASE; prec < CTF_PREC_MAX; prec++) {
1099		for (cdp = ctf_list_next(&cd.cd_nodes[prec]);
1100		    cdp != NULL; cdp = ctf_list_next(cdp)) {
1101
1102			const ctf_type_t *tp =
1103			    ctf_lookup_by_id(lc, cdp->cd_type);
1104			const char *name = ctf_strptr(lc, tp->ctt_name);
1105
1106			if (k != CTF_K_POINTER && k != CTF_K_ARRAY)
1107				ctf_decl_sprintf(&cd, " ");
1108
1109			if (lp == prec) {
1110				ctf_decl_sprintf(&cd, "(");
1111				lp = -1;
1112			}
1113
1114			switch (cdp->cd_kind) {
1115			case CTF_K_INTEGER:
1116			case CTF_K_FLOAT:
1117			case CTF_K_TYPEDEF:
1118				ctf_decl_sprintf(&cd, "%s", name);
1119				break;
1120			case CTF_K_POINTER:
1121				ctf_decl_sprintf(&cd, "*");
1122				break;
1123			case CTF_K_ARRAY:
1124				ctf_decl_sprintf(&cd, "[%u]", cdp->cd_n);
1125				break;
1126			case CTF_K_FUNCTION:
1127				ctf_decl_sprintf(&cd, "()");
1128				break;
1129			case CTF_K_STRUCT:
1130			case CTF_K_FORWARD:
1131				ctf_decl_sprintf(&cd, "struct %s", name);
1132				break;
1133			case CTF_K_UNION:
1134				ctf_decl_sprintf(&cd, "union %s", name);
1135				break;
1136			case CTF_K_ENUM:
1137				ctf_decl_sprintf(&cd, "enum %s", name);
1138				break;
1139			case CTF_K_VOLATILE:
1140				ctf_decl_sprintf(&cd, "volatile");
1141				break;
1142			case CTF_K_CONST:
1143				ctf_decl_sprintf(&cd, "const");
1144				break;
1145			case CTF_K_RESTRICT:
1146				ctf_decl_sprintf(&cd, "restrict");
1147				break;
1148			}
1149
1150			k = cdp->cd_kind;
1151		}
1152
1153		if (rp == prec)
1154			ctf_decl_sprintf(&cd, ")");
1155	}
1156
1157	ctf_decl_fini(&cd);
1158	return (cd.cd_len);
1159}
1160
1161static void
1162fbt_getargdesc(void *arg __unused, dtrace_id_t id __unused, void *parg, dtrace_argdesc_t *desc)
1163{
1164	const ushort_t *dp;
1165	fbt_probe_t *fbt = parg;
1166	linker_ctf_t lc;
1167	modctl_t *ctl = fbt->fbtp_ctl;
1168	int ndx = desc->dtargd_ndx;
1169	int symindx = fbt->fbtp_symindx;
1170	uint32_t *ctfoff;
1171	uint32_t offset;
1172	ushort_t info, kind, n;
1173
1174	desc->dtargd_ndx = DTRACE_ARGNONE;
1175
1176	/* Get a pointer to the CTF data and it's length. */
1177	if (linker_ctf_get(ctl, &lc) != 0)
1178		/* No CTF data? Something wrong? *shrug* */
1179		return;
1180
1181	/* Check if this module hasn't been initialised yet. */
1182	if (*lc.ctfoffp == NULL) {
1183		/*
1184		 * Initialise the CTF object and function symindx to
1185		 * byte offset array.
1186		 */
1187		if (fbt_ctfoff_init(ctl, &lc) != 0)
1188			return;
1189
1190		/* Initialise the CTF type to byte offset array. */
1191		if (fbt_typoff_init(&lc) != 0)
1192			return;
1193	}
1194
1195	ctfoff = *lc.ctfoffp;
1196
1197	if (ctfoff == NULL || *lc.typoffp == NULL)
1198		return;
1199
1200	/* Check if the symbol index is out of range. */
1201	if (symindx >= lc.nsym)
1202		return;
1203
1204	/* Check if the symbol isn't cross-referenced. */
1205	if ((offset = ctfoff[symindx]) == 0xffffffff)
1206		return;
1207
1208	dp = (const ushort_t *)(lc.ctftab + offset + sizeof(ctf_header_t));
1209
1210	info = *dp++;
1211	kind = CTF_INFO_KIND(info);
1212	n = CTF_INFO_VLEN(info);
1213
1214	if (kind == CTF_K_UNKNOWN && n == 0) {
1215		printf("%s(%d): Unknown function!\n",__func__,__LINE__);
1216		return;
1217	}
1218
1219	if (kind != CTF_K_FUNCTION) {
1220		printf("%s(%d): Expected a function!\n",__func__,__LINE__);
1221		return;
1222	}
1223
1224	/* Check if the requested argument doesn't exist. */
1225	if (ndx >= n)
1226		return;
1227
1228	/* Skip the return type and arguments up to the one requested. */
1229	dp += ndx + 1;
1230
1231	if (fbt_type_name(&lc, *dp, desc->dtargd_native, sizeof(desc->dtargd_native)) > 0)
1232		desc->dtargd_ndx = ndx;
1233
1234	return;
1235}
1236
1237static void
1238fbt_load(void *dummy)
1239{
1240	/* Create the /dev/dtrace/fbt entry. */
1241	fbt_cdev = make_dev(&fbt_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
1242	    "dtrace/fbt");
1243
1244	/* Default the probe table size if not specified. */
1245	if (fbt_probetab_size == 0)
1246		fbt_probetab_size = FBT_PROBETAB_SIZE;
1247
1248	/* Choose the hash mask for the probe table. */
1249	fbt_probetab_mask = fbt_probetab_size - 1;
1250
1251	/* Allocate memory for the probe table. */
1252	fbt_probetab =
1253	    malloc(fbt_probetab_size * sizeof (fbt_probe_t *), M_FBT, M_WAITOK | M_ZERO);
1254
1255	dtrace_invop_add(fbt_invop);
1256
1257	if (dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_USER,
1258	    NULL, &fbt_pops, NULL, &fbt_id) != 0)
1259		return;
1260}
1261
1262
1263static int
1264fbt_unload()
1265{
1266	int error = 0;
1267
1268	/* De-register the invalid opcode handler. */
1269	dtrace_invop_remove(fbt_invop);
1270
1271	/* De-register this DTrace provider. */
1272	if ((error = dtrace_unregister(fbt_id)) != 0)
1273		return (error);
1274
1275	/* Free the probe table. */
1276	free(fbt_probetab, M_FBT);
1277	fbt_probetab = NULL;
1278	fbt_probetab_mask = 0;
1279
1280	destroy_dev(fbt_cdev);
1281
1282	return (error);
1283}
1284
1285static int
1286fbt_modevent(module_t mod __unused, int type, void *data __unused)
1287{
1288	int error = 0;
1289
1290	switch (type) {
1291	case MOD_LOAD:
1292		break;
1293
1294	case MOD_UNLOAD:
1295		break;
1296
1297	case MOD_SHUTDOWN:
1298		break;
1299
1300	default:
1301		error = EOPNOTSUPP;
1302		break;
1303
1304	}
1305
1306	return (error);
1307}
1308
1309static int
1310fbt_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused)
1311{
1312	return (0);
1313}
1314
1315SYSINIT(fbt_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_load, NULL);
1316SYSUNINIT(fbt_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_unload, NULL);
1317
1318DEV_MODULE(fbt, fbt_modevent, NULL);
1319MODULE_VERSION(fbt, 1);
1320MODULE_DEPEND(fbt, dtrace, 1, 1, 1);
1321MODULE_DEPEND(fbt, opensolaris, 1, 1, 1);
1322