fbt.c revision 268869
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 *
21 * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
22 *
23 * $FreeBSD: head/sys/cddl/dev/fbt/fbt.c 268869 2014-07-19 02:27:31Z markj $
24 *
25 */
26
27/*
28 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
29 * Use is subject to license terms.
30 */
31
32#include <sys/cdefs.h>
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/conf.h>
36#include <sys/cpuvar.h>
37#include <sys/fcntl.h>
38#include <sys/filio.h>
39#include <sys/kdb.h>
40#include <sys/kernel.h>
41#include <sys/kmem.h>
42#include <sys/kthread.h>
43#include <sys/limits.h>
44#include <sys/linker.h>
45#include <sys/lock.h>
46#include <sys/malloc.h>
47#include <sys/module.h>
48#include <sys/mutex.h>
49#include <sys/pcpu.h>
50#include <sys/poll.h>
51#include <sys/proc.h>
52#include <sys/selinfo.h>
53#include <sys/smp.h>
54#include <sys/syscall.h>
55#include <sys/sysent.h>
56#include <sys/sysproto.h>
57#include <sys/uio.h>
58#include <sys/unistd.h>
59#include <machine/stdarg.h>
60
61#include <sys/dtrace.h>
62#include <sys/dtrace_bsd.h>
63
64static MALLOC_DEFINE(M_FBT, "fbt", "Function Boundary Tracing");
65
66#define	FBT_PUSHL_EBP		0x55
67#define	FBT_MOVL_ESP_EBP0_V0	0x8b
68#define	FBT_MOVL_ESP_EBP1_V0	0xec
69#define	FBT_MOVL_ESP_EBP0_V1	0x89
70#define	FBT_MOVL_ESP_EBP1_V1	0xe5
71#define	FBT_REX_RSP_RBP		0x48
72
73#define	FBT_POPL_EBP		0x5d
74#define	FBT_RET			0xc3
75#define	FBT_RET_IMM16		0xc2
76#define	FBT_LEAVE		0xc9
77
78#ifdef __amd64__
79#define	FBT_PATCHVAL		0xcc
80#else
81#define	FBT_PATCHVAL		0xf0
82#endif
83
84static d_open_t	fbt_open;
85static int	fbt_unload(void);
86static void	fbt_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
87static void	fbt_provide_module(void *, modctl_t *);
88static void	fbt_destroy(void *, dtrace_id_t, void *);
89static void	fbt_enable(void *, dtrace_id_t, void *);
90static void	fbt_disable(void *, dtrace_id_t, void *);
91static void	fbt_load(void *);
92static void	fbt_suspend(void *, dtrace_id_t, void *);
93static void	fbt_resume(void *, dtrace_id_t, void *);
94
95#define	FBT_ENTRY	"entry"
96#define	FBT_RETURN	"return"
97#define	FBT_ADDR2NDX(addr)	((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
98#define	FBT_PROBETAB_SIZE	0x8000		/* 32k entries -- 128K total */
99
100static struct cdevsw fbt_cdevsw = {
101	.d_version	= D_VERSION,
102	.d_open		= fbt_open,
103	.d_name		= "fbt",
104};
105
106static dtrace_pattr_t fbt_attr = {
107{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
108{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
109{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
110{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
111{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
112};
113
114static dtrace_pops_t fbt_pops = {
115	NULL,
116	fbt_provide_module,
117	fbt_enable,
118	fbt_disable,
119	fbt_suspend,
120	fbt_resume,
121	fbt_getargdesc,
122	NULL,
123	NULL,
124	fbt_destroy
125};
126
127typedef struct fbt_probe {
128	struct fbt_probe *fbtp_hashnext;
129	uint8_t		*fbtp_patchpoint;
130	int8_t		fbtp_rval;
131	uint8_t		fbtp_patchval;
132	uint8_t		fbtp_savedval;
133	uintptr_t	fbtp_roffset;
134	dtrace_id_t	fbtp_id;
135	const char	*fbtp_name;
136	modctl_t	*fbtp_ctl;
137	int		fbtp_loadcnt;
138	int		fbtp_primary;
139	int		fbtp_invop_cnt;
140	int		fbtp_symindx;
141	struct fbt_probe *fbtp_next;
142} fbt_probe_t;
143
144static struct cdev		*fbt_cdev;
145static dtrace_provider_id_t	fbt_id;
146static fbt_probe_t		**fbt_probetab;
147static int			fbt_probetab_size;
148static int			fbt_probetab_mask;
149static int			fbt_verbose = 0;
150
151static void
152fbt_doubletrap(void)
153{
154	fbt_probe_t *fbt;
155	int i;
156
157	for (i = 0; i < fbt_probetab_size; i++) {
158		fbt = fbt_probetab[i];
159
160		for (; fbt != NULL; fbt = fbt->fbtp_next)
161			*fbt->fbtp_patchpoint = fbt->fbtp_savedval;
162	}
163}
164
165static int
166fbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval)
167{
168	solaris_cpu_t *cpu = &solaris_cpu[curcpu];
169	uintptr_t stack0, stack1, stack2, stack3, stack4;
170	fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)];
171
172	for (; fbt != NULL; fbt = fbt->fbtp_hashnext) {
173		if ((uintptr_t)fbt->fbtp_patchpoint == addr) {
174			fbt->fbtp_invop_cnt++;
175			if (fbt->fbtp_roffset == 0) {
176				int i = 0;
177				/*
178				 * When accessing the arguments on the stack,
179				 * we must protect against accessing beyond
180				 * the stack.  We can safely set NOFAULT here
181				 * -- we know that interrupts are already
182				 * disabled.
183				 */
184				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
185				cpu->cpu_dtrace_caller = stack[i++];
186				stack0 = stack[i++];
187				stack1 = stack[i++];
188				stack2 = stack[i++];
189				stack3 = stack[i++];
190				stack4 = stack[i++];
191				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT |
192				    CPU_DTRACE_BADADDR);
193
194				dtrace_probe(fbt->fbtp_id, stack0, stack1,
195				    stack2, stack3, stack4);
196
197				cpu->cpu_dtrace_caller = 0;
198			} else {
199#ifdef __amd64__
200				/*
201				 * On amd64, we instrument the ret, not the
202				 * leave.  We therefore need to set the caller
203				 * to assure that the top frame of a stack()
204				 * action is correct.
205				 */
206				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
207				cpu->cpu_dtrace_caller = stack[0];
208				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT |
209				    CPU_DTRACE_BADADDR);
210#endif
211
212				dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset,
213				    rval, 0, 0, 0);
214				cpu->cpu_dtrace_caller = 0;
215			}
216
217			return (fbt->fbtp_rval);
218		}
219	}
220
221	return (0);
222}
223
224static int
225fbt_provide_module_function(linker_file_t lf, int symindx,
226    linker_symval_t *symval, void *opaque)
227{
228	char *modname = opaque;
229	const char *name = symval->name;
230	fbt_probe_t *fbt, *retfbt;
231	int j;
232	int size;
233	u_int8_t *instr, *limit;
234
235	if ((strncmp(name, "dtrace_", 7) == 0 &&
236	    strncmp(name, "dtrace_safe_", 12) != 0) ||
237	    strcmp(name, "trap_check") == 0) {
238		/*
239		 * Anything beginning with "dtrace_" may be called
240		 * from probe context unless it explicitly indicates
241		 * that it won't be called from probe context by
242		 * using the prefix "dtrace_safe_".
243		 *
244		 * Additionally, we avoid instrumenting trap_check() to avoid
245		 * the possibility of generating a fault in probe context before
246		 * DTrace's fault handler is called.
247		 */
248		return (0);
249	}
250
251	if (name[0] == '_' && name[1] == '_')
252		return (0);
253
254	size = symval->size;
255
256	instr = (u_int8_t *) symval->value;
257	limit = (u_int8_t *) symval->value + symval->size;
258
259#ifdef __amd64__
260	while (instr < limit) {
261		if (*instr == FBT_PUSHL_EBP)
262			break;
263
264		if ((size = dtrace_instr_size(instr)) <= 0)
265			break;
266
267		instr += size;
268	}
269
270	if (instr >= limit || *instr != FBT_PUSHL_EBP) {
271		/*
272		 * We either don't save the frame pointer in this
273		 * function, or we ran into some disassembly
274		 * screw-up.  Either way, we bail.
275		 */
276		return (0);
277	}
278#else
279	if (instr[0] != FBT_PUSHL_EBP)
280		return (0);
281
282	if (!(instr[1] == FBT_MOVL_ESP_EBP0_V0 &&
283	    instr[2] == FBT_MOVL_ESP_EBP1_V0) &&
284	    !(instr[1] == FBT_MOVL_ESP_EBP0_V1 &&
285	    instr[2] == FBT_MOVL_ESP_EBP1_V1))
286		return (0);
287#endif
288
289	fbt = malloc(sizeof (fbt_probe_t), M_FBT, M_WAITOK | M_ZERO);
290	fbt->fbtp_name = name;
291	fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
292	    name, FBT_ENTRY, 3, fbt);
293	fbt->fbtp_patchpoint = instr;
294	fbt->fbtp_ctl = lf;
295	fbt->fbtp_loadcnt = lf->loadcnt;
296	fbt->fbtp_rval = DTRACE_INVOP_PUSHL_EBP;
297	fbt->fbtp_savedval = *instr;
298	fbt->fbtp_patchval = FBT_PATCHVAL;
299	fbt->fbtp_symindx = symindx;
300
301	fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
302	fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
303
304	lf->fbt_nentries++;
305
306	retfbt = NULL;
307again:
308	if (instr >= limit)
309		return (0);
310
311	/*
312	 * If this disassembly fails, then we've likely walked off into
313	 * a jump table or some other unsuitable area.  Bail out of the
314	 * disassembly now.
315	 */
316	if ((size = dtrace_instr_size(instr)) <= 0)
317		return (0);
318
319#ifdef __amd64__
320	/*
321	 * We only instrument "ret" on amd64 -- we don't yet instrument
322	 * ret imm16, largely because the compiler doesn't seem to
323	 * (yet) emit them in the kernel...
324	 */
325	if (*instr != FBT_RET) {
326		instr += size;
327		goto again;
328	}
329#else
330	if (!(size == 1 &&
331	    (*instr == FBT_POPL_EBP || *instr == FBT_LEAVE) &&
332	    (*(instr + 1) == FBT_RET ||
333	    *(instr + 1) == FBT_RET_IMM16))) {
334		instr += size;
335		goto again;
336	}
337#endif
338
339	/*
340	 * We (desperately) want to avoid erroneously instrumenting a
341	 * jump table, especially given that our markers are pretty
342	 * short:  two bytes on x86, and just one byte on amd64.  To
343	 * determine if we're looking at a true instruction sequence
344	 * or an inline jump table that happens to contain the same
345	 * byte sequences, we resort to some heuristic sleeze:  we
346	 * treat this instruction as being contained within a pointer,
347	 * and see if that pointer points to within the body of the
348	 * function.  If it does, we refuse to instrument it.
349	 */
350	for (j = 0; j < sizeof (uintptr_t); j++) {
351		caddr_t check = (caddr_t) instr - j;
352		uint8_t *ptr;
353
354		if (check < symval->value)
355			break;
356
357		if (check + sizeof (caddr_t) > (caddr_t)limit)
358			continue;
359
360		ptr = *(uint8_t **)check;
361
362		if (ptr >= (uint8_t *) symval->value && ptr < limit) {
363			instr += size;
364			goto again;
365		}
366	}
367
368	/*
369	 * We have a winner!
370	 */
371	fbt = malloc(sizeof (fbt_probe_t), M_FBT, M_WAITOK | M_ZERO);
372	fbt->fbtp_name = name;
373
374	if (retfbt == NULL) {
375		fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
376		    name, FBT_RETURN, 3, fbt);
377	} else {
378		retfbt->fbtp_next = fbt;
379		fbt->fbtp_id = retfbt->fbtp_id;
380	}
381
382	retfbt = fbt;
383	fbt->fbtp_patchpoint = instr;
384	fbt->fbtp_ctl = lf;
385	fbt->fbtp_loadcnt = lf->loadcnt;
386	fbt->fbtp_symindx = symindx;
387
388#ifndef __amd64__
389	if (*instr == FBT_POPL_EBP) {
390		fbt->fbtp_rval = DTRACE_INVOP_POPL_EBP;
391	} else {
392		ASSERT(*instr == FBT_LEAVE);
393		fbt->fbtp_rval = DTRACE_INVOP_LEAVE;
394	}
395	fbt->fbtp_roffset =
396	    (uintptr_t)(instr - (uint8_t *) symval->value) + 1;
397
398#else
399	ASSERT(*instr == FBT_RET);
400	fbt->fbtp_rval = DTRACE_INVOP_RET;
401	fbt->fbtp_roffset =
402	    (uintptr_t)(instr - (uint8_t *) symval->value);
403#endif
404
405	fbt->fbtp_savedval = *instr;
406	fbt->fbtp_patchval = FBT_PATCHVAL;
407	fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
408	fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
409
410	lf->fbt_nentries++;
411
412	instr += size;
413	goto again;
414}
415
416static void
417fbt_provide_module(void *arg, modctl_t *lf)
418{
419	char modname[MAXPATHLEN];
420	int i;
421	size_t len;
422
423	strlcpy(modname, lf->filename, sizeof(modname));
424	len = strlen(modname);
425	if (len > 3 && strcmp(modname + len - 3, ".ko") == 0)
426		modname[len - 3] = '\0';
427
428	/*
429	 * Employees of dtrace and their families are ineligible.  Void
430	 * where prohibited.
431	 */
432	if (strcmp(modname, "dtrace") == 0)
433		return;
434
435	/*
436	 * The cyclic timer subsystem can be built as a module and DTrace
437	 * depends on that, so it is ineligible too.
438	 */
439	if (strcmp(modname, "cyclic") == 0)
440		return;
441
442	/*
443	 * To register with DTrace, a module must list 'dtrace' as a
444	 * dependency in order for the kernel linker to resolve
445	 * symbols like dtrace_register(). All modules with such a
446	 * dependency are ineligible for FBT tracing.
447	 */
448	for (i = 0; i < lf->ndeps; i++)
449		if (strncmp(lf->deps[i]->filename, "dtrace", 6) == 0)
450			return;
451
452	if (lf->fbt_nentries) {
453		/*
454		 * This module has some FBT entries allocated; we're afraid
455		 * to screw with it.
456		 */
457		return;
458	}
459
460	/*
461	 * List the functions in the module and the symbol values.
462	 */
463	(void) linker_file_function_listall(lf, fbt_provide_module_function, modname);
464}
465
466static void
467fbt_destroy(void *arg, dtrace_id_t id, void *parg)
468{
469	fbt_probe_t *fbt = parg, *next, *hash, *last;
470	modctl_t *ctl;
471	int ndx;
472
473	do {
474		ctl = fbt->fbtp_ctl;
475
476		ctl->fbt_nentries--;
477
478		/*
479		 * Now we need to remove this probe from the fbt_probetab.
480		 */
481		ndx = FBT_ADDR2NDX(fbt->fbtp_patchpoint);
482		last = NULL;
483		hash = fbt_probetab[ndx];
484
485		while (hash != fbt) {
486			ASSERT(hash != NULL);
487			last = hash;
488			hash = hash->fbtp_hashnext;
489		}
490
491		if (last != NULL) {
492			last->fbtp_hashnext = fbt->fbtp_hashnext;
493		} else {
494			fbt_probetab[ndx] = fbt->fbtp_hashnext;
495		}
496
497		next = fbt->fbtp_next;
498		free(fbt, M_FBT);
499
500		fbt = next;
501	} while (fbt != NULL);
502}
503
504static void
505fbt_enable(void *arg, dtrace_id_t id, void *parg)
506{
507	fbt_probe_t *fbt = parg;
508	modctl_t *ctl = fbt->fbtp_ctl;
509
510	ctl->nenabled++;
511
512	/*
513	 * Now check that our modctl has the expected load count.  If it
514	 * doesn't, this module must have been unloaded and reloaded -- and
515	 * we're not going to touch it.
516	 */
517	if (ctl->loadcnt != fbt->fbtp_loadcnt) {
518		if (fbt_verbose) {
519			printf("fbt is failing for probe %s "
520			    "(module %s reloaded)",
521			    fbt->fbtp_name, ctl->filename);
522		}
523
524		return;
525	}
526
527	for (; fbt != NULL; fbt = fbt->fbtp_next) {
528		*fbt->fbtp_patchpoint = fbt->fbtp_patchval;
529	}
530}
531
532static void
533fbt_disable(void *arg, dtrace_id_t id, void *parg)
534{
535	fbt_probe_t *fbt = parg;
536	modctl_t *ctl = fbt->fbtp_ctl;
537
538	ASSERT(ctl->nenabled > 0);
539	ctl->nenabled--;
540
541	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
542		return;
543
544	for (; fbt != NULL; fbt = fbt->fbtp_next)
545		*fbt->fbtp_patchpoint = fbt->fbtp_savedval;
546}
547
548static void
549fbt_suspend(void *arg, dtrace_id_t id, void *parg)
550{
551	fbt_probe_t *fbt = parg;
552	modctl_t *ctl = fbt->fbtp_ctl;
553
554	ASSERT(ctl->nenabled > 0);
555
556	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
557		return;
558
559	for (; fbt != NULL; fbt = fbt->fbtp_next)
560		*fbt->fbtp_patchpoint = fbt->fbtp_savedval;
561}
562
563static void
564fbt_resume(void *arg, dtrace_id_t id, void *parg)
565{
566	fbt_probe_t *fbt = parg;
567	modctl_t *ctl = fbt->fbtp_ctl;
568
569	ASSERT(ctl->nenabled > 0);
570
571	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
572		return;
573
574	for (; fbt != NULL; fbt = fbt->fbtp_next)
575		*fbt->fbtp_patchpoint = fbt->fbtp_patchval;
576}
577
578static int
579fbt_ctfoff_init(modctl_t *lf, linker_ctf_t *lc)
580{
581	const Elf_Sym *symp = lc->symtab;;
582	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
583	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
584	int i;
585	uint32_t *ctfoff;
586	uint32_t objtoff = hp->cth_objtoff;
587	uint32_t funcoff = hp->cth_funcoff;
588	ushort_t info;
589	ushort_t vlen;
590
591	/* Sanity check. */
592	if (hp->cth_magic != CTF_MAGIC) {
593		printf("Bad magic value in CTF data of '%s'\n",lf->pathname);
594		return (EINVAL);
595	}
596
597	if (lc->symtab == NULL) {
598		printf("No symbol table in '%s'\n",lf->pathname);
599		return (EINVAL);
600	}
601
602	if ((ctfoff = malloc(sizeof(uint32_t) * lc->nsym, M_LINKER, M_WAITOK)) == NULL)
603		return (ENOMEM);
604
605	*lc->ctfoffp = ctfoff;
606
607	for (i = 0; i < lc->nsym; i++, ctfoff++, symp++) {
608		if (symp->st_name == 0 || symp->st_shndx == SHN_UNDEF) {
609			*ctfoff = 0xffffffff;
610			continue;
611		}
612
613		switch (ELF_ST_TYPE(symp->st_info)) {
614		case STT_OBJECT:
615			if (objtoff >= hp->cth_funcoff ||
616                            (symp->st_shndx == SHN_ABS && symp->st_value == 0)) {
617				*ctfoff = 0xffffffff;
618                                break;
619                        }
620
621                        *ctfoff = objtoff;
622                        objtoff += sizeof (ushort_t);
623			break;
624
625		case STT_FUNC:
626			if (funcoff >= hp->cth_typeoff) {
627				*ctfoff = 0xffffffff;
628				break;
629			}
630
631			*ctfoff = funcoff;
632
633			info = *((const ushort_t *)(ctfdata + funcoff));
634			vlen = CTF_INFO_VLEN(info);
635
636			/*
637			 * If we encounter a zero pad at the end, just skip it.
638			 * Otherwise skip over the function and its return type
639			 * (+2) and the argument list (vlen).
640			 */
641			if (CTF_INFO_KIND(info) == CTF_K_UNKNOWN && vlen == 0)
642				funcoff += sizeof (ushort_t); /* skip pad */
643			else
644				funcoff += sizeof (ushort_t) * (vlen + 2);
645			break;
646
647		default:
648			*ctfoff = 0xffffffff;
649			break;
650		}
651	}
652
653	return (0);
654}
655
656static ssize_t
657fbt_get_ctt_size(uint8_t version, const ctf_type_t *tp, ssize_t *sizep,
658    ssize_t *incrementp)
659{
660	ssize_t size, increment;
661
662	if (version > CTF_VERSION_1 &&
663	    tp->ctt_size == CTF_LSIZE_SENT) {
664		size = CTF_TYPE_LSIZE(tp);
665		increment = sizeof (ctf_type_t);
666	} else {
667		size = tp->ctt_size;
668		increment = sizeof (ctf_stype_t);
669	}
670
671	if (sizep)
672		*sizep = size;
673	if (incrementp)
674		*incrementp = increment;
675
676	return (size);
677}
678
679static int
680fbt_typoff_init(linker_ctf_t *lc)
681{
682	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
683	const ctf_type_t *tbuf;
684	const ctf_type_t *tend;
685	const ctf_type_t *tp;
686	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
687	int ctf_typemax = 0;
688	uint32_t *xp;
689	ulong_t pop[CTF_K_MAX + 1] = { 0 };
690
691
692	/* Sanity check. */
693	if (hp->cth_magic != CTF_MAGIC)
694		return (EINVAL);
695
696	tbuf = (const ctf_type_t *) (ctfdata + hp->cth_typeoff);
697	tend = (const ctf_type_t *) (ctfdata + hp->cth_stroff);
698
699	int child = hp->cth_parname != 0;
700
701	/*
702	 * We make two passes through the entire type section.  In this first
703	 * pass, we count the number of each type and the total number of types.
704	 */
705	for (tp = tbuf; tp < tend; ctf_typemax++) {
706		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
707		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
708		ssize_t size, increment;
709
710		size_t vbytes;
711		uint_t n;
712
713		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
714
715		switch (kind) {
716		case CTF_K_INTEGER:
717		case CTF_K_FLOAT:
718			vbytes = sizeof (uint_t);
719			break;
720		case CTF_K_ARRAY:
721			vbytes = sizeof (ctf_array_t);
722			break;
723		case CTF_K_FUNCTION:
724			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
725			break;
726		case CTF_K_STRUCT:
727		case CTF_K_UNION:
728			if (size < CTF_LSTRUCT_THRESH) {
729				ctf_member_t *mp = (ctf_member_t *)
730				    ((uintptr_t)tp + increment);
731
732				vbytes = sizeof (ctf_member_t) * vlen;
733				for (n = vlen; n != 0; n--, mp++)
734					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
735			} else {
736				ctf_lmember_t *lmp = (ctf_lmember_t *)
737				    ((uintptr_t)tp + increment);
738
739				vbytes = sizeof (ctf_lmember_t) * vlen;
740				for (n = vlen; n != 0; n--, lmp++)
741					child |=
742					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
743			}
744			break;
745		case CTF_K_ENUM:
746			vbytes = sizeof (ctf_enum_t) * vlen;
747			break;
748		case CTF_K_FORWARD:
749			/*
750			 * For forward declarations, ctt_type is the CTF_K_*
751			 * kind for the tag, so bump that population count too.
752			 * If ctt_type is unknown, treat the tag as a struct.
753			 */
754			if (tp->ctt_type == CTF_K_UNKNOWN ||
755			    tp->ctt_type >= CTF_K_MAX)
756				pop[CTF_K_STRUCT]++;
757			else
758				pop[tp->ctt_type]++;
759			/*FALLTHRU*/
760		case CTF_K_UNKNOWN:
761			vbytes = 0;
762			break;
763		case CTF_K_POINTER:
764		case CTF_K_TYPEDEF:
765		case CTF_K_VOLATILE:
766		case CTF_K_CONST:
767		case CTF_K_RESTRICT:
768			child |= CTF_TYPE_ISCHILD(tp->ctt_type);
769			vbytes = 0;
770			break;
771		default:
772			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
773			return (EIO);
774		}
775		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
776		pop[kind]++;
777	}
778
779	/* account for a sentinel value below */
780	ctf_typemax++;
781	*lc->typlenp = ctf_typemax;
782
783	if ((xp = malloc(sizeof(uint32_t) * ctf_typemax, M_LINKER, M_ZERO | M_WAITOK)) == NULL)
784		return (ENOMEM);
785
786	*lc->typoffp = xp;
787
788	/* type id 0 is used as a sentinel value */
789	*xp++ = 0;
790
791	/*
792	 * In the second pass, fill in the type offset.
793	 */
794	for (tp = tbuf; tp < tend; xp++) {
795		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
796		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
797		ssize_t size, increment;
798
799		size_t vbytes;
800		uint_t n;
801
802		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
803
804		switch (kind) {
805		case CTF_K_INTEGER:
806		case CTF_K_FLOAT:
807			vbytes = sizeof (uint_t);
808			break;
809		case CTF_K_ARRAY:
810			vbytes = sizeof (ctf_array_t);
811			break;
812		case CTF_K_FUNCTION:
813			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
814			break;
815		case CTF_K_STRUCT:
816		case CTF_K_UNION:
817			if (size < CTF_LSTRUCT_THRESH) {
818				ctf_member_t *mp = (ctf_member_t *)
819				    ((uintptr_t)tp + increment);
820
821				vbytes = sizeof (ctf_member_t) * vlen;
822				for (n = vlen; n != 0; n--, mp++)
823					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
824			} else {
825				ctf_lmember_t *lmp = (ctf_lmember_t *)
826				    ((uintptr_t)tp + increment);
827
828				vbytes = sizeof (ctf_lmember_t) * vlen;
829				for (n = vlen; n != 0; n--, lmp++)
830					child |=
831					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
832			}
833			break;
834		case CTF_K_ENUM:
835			vbytes = sizeof (ctf_enum_t) * vlen;
836			break;
837		case CTF_K_FORWARD:
838		case CTF_K_UNKNOWN:
839			vbytes = 0;
840			break;
841		case CTF_K_POINTER:
842		case CTF_K_TYPEDEF:
843		case CTF_K_VOLATILE:
844		case CTF_K_CONST:
845		case CTF_K_RESTRICT:
846			vbytes = 0;
847			break;
848		default:
849			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
850			return (EIO);
851		}
852		*xp = (uint32_t)((uintptr_t) tp - (uintptr_t) ctfdata);
853		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
854	}
855
856	return (0);
857}
858
859/*
860 * CTF Declaration Stack
861 *
862 * In order to implement ctf_type_name(), we must convert a type graph back
863 * into a C type declaration.  Unfortunately, a type graph represents a storage
864 * class ordering of the type whereas a type declaration must obey the C rules
865 * for operator precedence, and the two orderings are frequently in conflict.
866 * For example, consider these CTF type graphs and their C declarations:
867 *
868 * CTF_K_POINTER -> CTF_K_FUNCTION -> CTF_K_INTEGER  : int (*)()
869 * CTF_K_POINTER -> CTF_K_ARRAY -> CTF_K_INTEGER     : int (*)[]
870 *
871 * In each case, parentheses are used to raise operator * to higher lexical
872 * precedence, so the string form of the C declaration cannot be constructed by
873 * walking the type graph links and forming the string from left to right.
874 *
875 * The functions in this file build a set of stacks from the type graph nodes
876 * corresponding to the C operator precedence levels in the appropriate order.
877 * The code in ctf_type_name() can then iterate over the levels and nodes in
878 * lexical precedence order and construct the final C declaration string.
879 */
880typedef struct ctf_list {
881	struct ctf_list *l_prev; /* previous pointer or tail pointer */
882	struct ctf_list *l_next; /* next pointer or head pointer */
883} ctf_list_t;
884
885#define	ctf_list_prev(elem)	((void *)(((ctf_list_t *)(elem))->l_prev))
886#define	ctf_list_next(elem)	((void *)(((ctf_list_t *)(elem))->l_next))
887
888typedef enum {
889	CTF_PREC_BASE,
890	CTF_PREC_POINTER,
891	CTF_PREC_ARRAY,
892	CTF_PREC_FUNCTION,
893	CTF_PREC_MAX
894} ctf_decl_prec_t;
895
896typedef struct ctf_decl_node {
897	ctf_list_t cd_list;			/* linked list pointers */
898	ctf_id_t cd_type;			/* type identifier */
899	uint_t cd_kind;				/* type kind */
900	uint_t cd_n;				/* type dimension if array */
901} ctf_decl_node_t;
902
903typedef struct ctf_decl {
904	ctf_list_t cd_nodes[CTF_PREC_MAX];	/* declaration node stacks */
905	int cd_order[CTF_PREC_MAX];		/* storage order of decls */
906	ctf_decl_prec_t cd_qualp;		/* qualifier precision */
907	ctf_decl_prec_t cd_ordp;		/* ordered precision */
908	char *cd_buf;				/* buffer for output */
909	char *cd_ptr;				/* buffer location */
910	char *cd_end;				/* buffer limit */
911	size_t cd_len;				/* buffer space required */
912	int cd_err;				/* saved error value */
913} ctf_decl_t;
914
915/*
916 * Simple doubly-linked list append routine.  This implementation assumes that
917 * each list element contains an embedded ctf_list_t as the first member.
918 * An additional ctf_list_t is used to store the head (l_next) and tail
919 * (l_prev) pointers.  The current head and tail list elements have their
920 * previous and next pointers set to NULL, respectively.
921 */
922static void
923ctf_list_append(ctf_list_t *lp, void *new)
924{
925	ctf_list_t *p = lp->l_prev;	/* p = tail list element */
926	ctf_list_t *q = new;		/* q = new list element */
927
928	lp->l_prev = q;
929	q->l_prev = p;
930	q->l_next = NULL;
931
932	if (p != NULL)
933		p->l_next = q;
934	else
935		lp->l_next = q;
936}
937
938/*
939 * Prepend the specified existing element to the given ctf_list_t.  The
940 * existing pointer should be pointing at a struct with embedded ctf_list_t.
941 */
942static void
943ctf_list_prepend(ctf_list_t *lp, void *new)
944{
945	ctf_list_t *p = new;		/* p = new list element */
946	ctf_list_t *q = lp->l_next;	/* q = head list element */
947
948	lp->l_next = p;
949	p->l_prev = NULL;
950	p->l_next = q;
951
952	if (q != NULL)
953		q->l_prev = p;
954	else
955		lp->l_prev = p;
956}
957
958static void
959ctf_decl_init(ctf_decl_t *cd, char *buf, size_t len)
960{
961	int i;
962
963	bzero(cd, sizeof (ctf_decl_t));
964
965	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++)
966		cd->cd_order[i] = CTF_PREC_BASE - 1;
967
968	cd->cd_qualp = CTF_PREC_BASE;
969	cd->cd_ordp = CTF_PREC_BASE;
970
971	cd->cd_buf = buf;
972	cd->cd_ptr = buf;
973	cd->cd_end = buf + len;
974}
975
976static void
977ctf_decl_fini(ctf_decl_t *cd)
978{
979	ctf_decl_node_t *cdp, *ndp;
980	int i;
981
982	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++) {
983		for (cdp = ctf_list_next(&cd->cd_nodes[i]);
984		    cdp != NULL; cdp = ndp) {
985			ndp = ctf_list_next(cdp);
986			free(cdp, M_FBT);
987		}
988	}
989}
990
991static const ctf_type_t *
992ctf_lookup_by_id(linker_ctf_t *lc, ctf_id_t type)
993{
994	const ctf_type_t *tp;
995	uint32_t offset;
996	uint32_t *typoff = *lc->typoffp;
997
998	if (type >= *lc->typlenp) {
999		printf("%s(%d): type %d exceeds max %ld\n",__func__,__LINE__,(int) type,*lc->typlenp);
1000		return(NULL);
1001	}
1002
1003	/* Check if the type isn't cross-referenced. */
1004	if ((offset = typoff[type]) == 0) {
1005		printf("%s(%d): type %d isn't cross referenced\n",__func__,__LINE__, (int) type);
1006		return(NULL);
1007	}
1008
1009	tp = (const ctf_type_t *)(lc->ctftab + offset + sizeof(ctf_header_t));
1010
1011	return (tp);
1012}
1013
1014static void
1015fbt_array_info(linker_ctf_t *lc, ctf_id_t type, ctf_arinfo_t *arp)
1016{
1017	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
1018	const ctf_type_t *tp;
1019	const ctf_array_t *ap;
1020	ssize_t increment;
1021
1022	bzero(arp, sizeof(*arp));
1023
1024	if ((tp = ctf_lookup_by_id(lc, type)) == NULL)
1025		return;
1026
1027	if (CTF_INFO_KIND(tp->ctt_info) != CTF_K_ARRAY)
1028		return;
1029
1030	(void) fbt_get_ctt_size(hp->cth_version, tp, NULL, &increment);
1031
1032	ap = (const ctf_array_t *)((uintptr_t)tp + increment);
1033	arp->ctr_contents = ap->cta_contents;
1034	arp->ctr_index = ap->cta_index;
1035	arp->ctr_nelems = ap->cta_nelems;
1036}
1037
1038static const char *
1039ctf_strptr(linker_ctf_t *lc, int name)
1040{
1041	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;;
1042	const char *strp = "";
1043
1044	if (name < 0 || name >= hp->cth_strlen)
1045		return(strp);
1046
1047	strp = (const char *)(lc->ctftab + hp->cth_stroff + name + sizeof(ctf_header_t));
1048
1049	return (strp);
1050}
1051
1052static void
1053ctf_decl_push(ctf_decl_t *cd, linker_ctf_t *lc, ctf_id_t type)
1054{
1055	ctf_decl_node_t *cdp;
1056	ctf_decl_prec_t prec;
1057	uint_t kind, n = 1;
1058	int is_qual = 0;
1059
1060	const ctf_type_t *tp;
1061	ctf_arinfo_t ar;
1062
1063	if ((tp = ctf_lookup_by_id(lc, type)) == NULL) {
1064		cd->cd_err = ENOENT;
1065		return;
1066	}
1067
1068	switch (kind = CTF_INFO_KIND(tp->ctt_info)) {
1069	case CTF_K_ARRAY:
1070		fbt_array_info(lc, type, &ar);
1071		ctf_decl_push(cd, lc, ar.ctr_contents);
1072		n = ar.ctr_nelems;
1073		prec = CTF_PREC_ARRAY;
1074		break;
1075
1076	case CTF_K_TYPEDEF:
1077		if (ctf_strptr(lc, tp->ctt_name)[0] == '\0') {
1078			ctf_decl_push(cd, lc, tp->ctt_type);
1079			return;
1080		}
1081		prec = CTF_PREC_BASE;
1082		break;
1083
1084	case CTF_K_FUNCTION:
1085		ctf_decl_push(cd, lc, tp->ctt_type);
1086		prec = CTF_PREC_FUNCTION;
1087		break;
1088
1089	case CTF_K_POINTER:
1090		ctf_decl_push(cd, lc, tp->ctt_type);
1091		prec = CTF_PREC_POINTER;
1092		break;
1093
1094	case CTF_K_VOLATILE:
1095	case CTF_K_CONST:
1096	case CTF_K_RESTRICT:
1097		ctf_decl_push(cd, lc, tp->ctt_type);
1098		prec = cd->cd_qualp;
1099		is_qual++;
1100		break;
1101
1102	default:
1103		prec = CTF_PREC_BASE;
1104	}
1105
1106	if ((cdp = malloc(sizeof (ctf_decl_node_t), M_FBT, M_WAITOK)) == NULL) {
1107		cd->cd_err = EAGAIN;
1108		return;
1109	}
1110
1111	cdp->cd_type = type;
1112	cdp->cd_kind = kind;
1113	cdp->cd_n = n;
1114
1115	if (ctf_list_next(&cd->cd_nodes[prec]) == NULL)
1116		cd->cd_order[prec] = cd->cd_ordp++;
1117
1118	/*
1119	 * Reset cd_qualp to the highest precedence level that we've seen so
1120	 * far that can be qualified (CTF_PREC_BASE or CTF_PREC_POINTER).
1121	 */
1122	if (prec > cd->cd_qualp && prec < CTF_PREC_ARRAY)
1123		cd->cd_qualp = prec;
1124
1125	/*
1126	 * C array declarators are ordered inside out so prepend them.  Also by
1127	 * convention qualifiers of base types precede the type specifier (e.g.
1128	 * const int vs. int const) even though the two forms are equivalent.
1129	 */
1130	if (kind == CTF_K_ARRAY || (is_qual && prec == CTF_PREC_BASE))
1131		ctf_list_prepend(&cd->cd_nodes[prec], cdp);
1132	else
1133		ctf_list_append(&cd->cd_nodes[prec], cdp);
1134}
1135
1136static void
1137ctf_decl_sprintf(ctf_decl_t *cd, const char *format, ...)
1138{
1139	size_t len = (size_t)(cd->cd_end - cd->cd_ptr);
1140	va_list ap;
1141	size_t n;
1142
1143	va_start(ap, format);
1144	n = vsnprintf(cd->cd_ptr, len, format, ap);
1145	va_end(ap);
1146
1147	cd->cd_ptr += MIN(n, len);
1148	cd->cd_len += n;
1149}
1150
1151static ssize_t
1152fbt_type_name(linker_ctf_t *lc, ctf_id_t type, char *buf, size_t len)
1153{
1154	ctf_decl_t cd;
1155	ctf_decl_node_t *cdp;
1156	ctf_decl_prec_t prec, lp, rp;
1157	int ptr, arr;
1158	uint_t k;
1159
1160	if (lc == NULL && type == CTF_ERR)
1161		return (-1); /* simplify caller code by permitting CTF_ERR */
1162
1163	ctf_decl_init(&cd, buf, len);
1164	ctf_decl_push(&cd, lc, type);
1165
1166	if (cd.cd_err != 0) {
1167		ctf_decl_fini(&cd);
1168		return (-1);
1169	}
1170
1171	/*
1172	 * If the type graph's order conflicts with lexical precedence order
1173	 * for pointers or arrays, then we need to surround the declarations at
1174	 * the corresponding lexical precedence with parentheses.  This can
1175	 * result in either a parenthesized pointer (*) as in int (*)() or
1176	 * int (*)[], or in a parenthesized pointer and array as in int (*[])().
1177	 */
1178	ptr = cd.cd_order[CTF_PREC_POINTER] > CTF_PREC_POINTER;
1179	arr = cd.cd_order[CTF_PREC_ARRAY] > CTF_PREC_ARRAY;
1180
1181	rp = arr ? CTF_PREC_ARRAY : ptr ? CTF_PREC_POINTER : -1;
1182	lp = ptr ? CTF_PREC_POINTER : arr ? CTF_PREC_ARRAY : -1;
1183
1184	k = CTF_K_POINTER; /* avoid leading whitespace (see below) */
1185
1186	for (prec = CTF_PREC_BASE; prec < CTF_PREC_MAX; prec++) {
1187		for (cdp = ctf_list_next(&cd.cd_nodes[prec]);
1188		    cdp != NULL; cdp = ctf_list_next(cdp)) {
1189
1190			const ctf_type_t *tp =
1191			    ctf_lookup_by_id(lc, cdp->cd_type);
1192			const char *name = ctf_strptr(lc, tp->ctt_name);
1193
1194			if (k != CTF_K_POINTER && k != CTF_K_ARRAY)
1195				ctf_decl_sprintf(&cd, " ");
1196
1197			if (lp == prec) {
1198				ctf_decl_sprintf(&cd, "(");
1199				lp = -1;
1200			}
1201
1202			switch (cdp->cd_kind) {
1203			case CTF_K_INTEGER:
1204			case CTF_K_FLOAT:
1205			case CTF_K_TYPEDEF:
1206				ctf_decl_sprintf(&cd, "%s", name);
1207				break;
1208			case CTF_K_POINTER:
1209				ctf_decl_sprintf(&cd, "*");
1210				break;
1211			case CTF_K_ARRAY:
1212				ctf_decl_sprintf(&cd, "[%u]", cdp->cd_n);
1213				break;
1214			case CTF_K_FUNCTION:
1215				ctf_decl_sprintf(&cd, "()");
1216				break;
1217			case CTF_K_STRUCT:
1218			case CTF_K_FORWARD:
1219				ctf_decl_sprintf(&cd, "struct %s", name);
1220				break;
1221			case CTF_K_UNION:
1222				ctf_decl_sprintf(&cd, "union %s", name);
1223				break;
1224			case CTF_K_ENUM:
1225				ctf_decl_sprintf(&cd, "enum %s", name);
1226				break;
1227			case CTF_K_VOLATILE:
1228				ctf_decl_sprintf(&cd, "volatile");
1229				break;
1230			case CTF_K_CONST:
1231				ctf_decl_sprintf(&cd, "const");
1232				break;
1233			case CTF_K_RESTRICT:
1234				ctf_decl_sprintf(&cd, "restrict");
1235				break;
1236			}
1237
1238			k = cdp->cd_kind;
1239		}
1240
1241		if (rp == prec)
1242			ctf_decl_sprintf(&cd, ")");
1243	}
1244
1245	ctf_decl_fini(&cd);
1246	return (cd.cd_len);
1247}
1248
1249static void
1250fbt_getargdesc(void *arg __unused, dtrace_id_t id __unused, void *parg, dtrace_argdesc_t *desc)
1251{
1252	const ushort_t *dp;
1253	fbt_probe_t *fbt = parg;
1254	linker_ctf_t lc;
1255	modctl_t *ctl = fbt->fbtp_ctl;
1256	int ndx = desc->dtargd_ndx;
1257	int symindx = fbt->fbtp_symindx;
1258	uint32_t *ctfoff;
1259	uint32_t offset;
1260	ushort_t info, kind, n;
1261
1262	if (fbt->fbtp_roffset != 0 && desc->dtargd_ndx == 0) {
1263		(void) strcpy(desc->dtargd_native, "int");
1264		return;
1265	}
1266
1267	desc->dtargd_ndx = DTRACE_ARGNONE;
1268
1269	/* Get a pointer to the CTF data and it's length. */
1270	if (linker_ctf_get(ctl, &lc) != 0)
1271		/* No CTF data? Something wrong? *shrug* */
1272		return;
1273
1274	/* Check if this module hasn't been initialised yet. */
1275	if (*lc.ctfoffp == NULL) {
1276		/*
1277		 * Initialise the CTF object and function symindx to
1278		 * byte offset array.
1279		 */
1280		if (fbt_ctfoff_init(ctl, &lc) != 0)
1281			return;
1282
1283		/* Initialise the CTF type to byte offset array. */
1284		if (fbt_typoff_init(&lc) != 0)
1285			return;
1286	}
1287
1288	ctfoff = *lc.ctfoffp;
1289
1290	if (ctfoff == NULL || *lc.typoffp == NULL)
1291		return;
1292
1293	/* Check if the symbol index is out of range. */
1294	if (symindx >= lc.nsym)
1295		return;
1296
1297	/* Check if the symbol isn't cross-referenced. */
1298	if ((offset = ctfoff[symindx]) == 0xffffffff)
1299		return;
1300
1301	dp = (const ushort_t *)(lc.ctftab + offset + sizeof(ctf_header_t));
1302
1303	info = *dp++;
1304	kind = CTF_INFO_KIND(info);
1305	n = CTF_INFO_VLEN(info);
1306
1307	if (kind == CTF_K_UNKNOWN && n == 0) {
1308		printf("%s(%d): Unknown function!\n",__func__,__LINE__);
1309		return;
1310	}
1311
1312	if (kind != CTF_K_FUNCTION) {
1313		printf("%s(%d): Expected a function!\n",__func__,__LINE__);
1314		return;
1315	}
1316
1317	if (fbt->fbtp_roffset != 0) {
1318		/* Only return type is available for args[1] in return probe. */
1319		if (ndx > 1)
1320			return;
1321		ASSERT(ndx == 1);
1322	} else {
1323		/* Check if the requested argument doesn't exist. */
1324		if (ndx >= n)
1325			return;
1326
1327		/* Skip the return type and arguments up to the one requested. */
1328		dp += ndx + 1;
1329	}
1330
1331	if (fbt_type_name(&lc, *dp, desc->dtargd_native, sizeof(desc->dtargd_native)) > 0)
1332		desc->dtargd_ndx = ndx;
1333
1334	return;
1335}
1336
1337static int
1338fbt_linker_file_cb(linker_file_t lf, void *arg)
1339{
1340
1341	fbt_provide_module(arg, lf);
1342
1343	return (0);
1344}
1345
1346static void
1347fbt_load(void *dummy)
1348{
1349	/* Create the /dev/dtrace/fbt entry. */
1350	fbt_cdev = make_dev(&fbt_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
1351	    "dtrace/fbt");
1352
1353	/* Default the probe table size if not specified. */
1354	if (fbt_probetab_size == 0)
1355		fbt_probetab_size = FBT_PROBETAB_SIZE;
1356
1357	/* Choose the hash mask for the probe table. */
1358	fbt_probetab_mask = fbt_probetab_size - 1;
1359
1360	/* Allocate memory for the probe table. */
1361	fbt_probetab =
1362	    malloc(fbt_probetab_size * sizeof (fbt_probe_t *), M_FBT, M_WAITOK | M_ZERO);
1363
1364	dtrace_doubletrap_func = fbt_doubletrap;
1365	dtrace_invop_add(fbt_invop);
1366
1367	if (dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_USER,
1368	    NULL, &fbt_pops, NULL, &fbt_id) != 0)
1369		return;
1370
1371	/* Create probes for the kernel and already-loaded modules. */
1372	linker_file_foreach(fbt_linker_file_cb, NULL);
1373}
1374
1375static int
1376fbt_unload()
1377{
1378	int error = 0;
1379
1380	/* De-register the invalid opcode handler. */
1381	dtrace_invop_remove(fbt_invop);
1382
1383	dtrace_doubletrap_func = NULL;
1384
1385	/* De-register this DTrace provider. */
1386	if ((error = dtrace_unregister(fbt_id)) != 0)
1387		return (error);
1388
1389	/* Free the probe table. */
1390	free(fbt_probetab, M_FBT);
1391	fbt_probetab = NULL;
1392	fbt_probetab_mask = 0;
1393
1394	destroy_dev(fbt_cdev);
1395
1396	return (error);
1397}
1398
1399static int
1400fbt_modevent(module_t mod __unused, int type, void *data __unused)
1401{
1402	int error = 0;
1403
1404	switch (type) {
1405	case MOD_LOAD:
1406		break;
1407
1408	case MOD_UNLOAD:
1409		break;
1410
1411	case MOD_SHUTDOWN:
1412		break;
1413
1414	default:
1415		error = EOPNOTSUPP;
1416		break;
1417
1418	}
1419
1420	return (error);
1421}
1422
1423static int
1424fbt_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused)
1425{
1426	return (0);
1427}
1428
1429SYSINIT(fbt_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_load, NULL);
1430SYSUNINIT(fbt_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_unload, NULL);
1431
1432DEV_MODULE(fbt, fbt_modevent, NULL);
1433MODULE_VERSION(fbt, 1);
1434MODULE_DEPEND(fbt, dtrace, 1, 1, 1);
1435MODULE_DEPEND(fbt, opensolaris, 1, 1, 1);
1436