1179237Sjb/*
2179237Sjb * CDDL HEADER START
3179237Sjb *
4179237Sjb * The contents of this file are subject to the terms of the
5179237Sjb * Common Development and Distribution License (the "License").
6179237Sjb * You may not use this file except in compliance with the License.
7179237Sjb *
8179237Sjb * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9179237Sjb * or http://www.opensolaris.org/os/licensing.
10179237Sjb * See the License for the specific language governing permissions
11179237Sjb * and limitations under the License.
12179237Sjb *
13179237Sjb * When distributing Covered Code, include this CDDL HEADER in each
14179237Sjb * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15179237Sjb * If applicable, add the following below this CDDL HEADER, with the
16179237Sjb * fields enclosed by brackets "[]" replaced with your own identifying
17179237Sjb * information: Portions Copyright [yyyy] [name of copyright owner]
18179237Sjb *
19179237Sjb * CDDL HEADER END
20179237Sjb *
21179237Sjb * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
22179237Sjb *
23179237Sjb * $FreeBSD: releng/10.2/sys/cddl/dev/fbt/fbt.c 282748 2015-05-11 07:54:39Z avg $
24179237Sjb *
25179237Sjb */
26179237Sjb
27179237Sjb/*
28179237Sjb * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
29179237Sjb * Use is subject to license terms.
30179237Sjb */
31179237Sjb
32179237Sjb#include <sys/cdefs.h>
33179237Sjb#include <sys/param.h>
34179237Sjb#include <sys/systm.h>
35179237Sjb#include <sys/conf.h>
36179237Sjb#include <sys/cpuvar.h>
37179237Sjb#include <sys/fcntl.h>
38179237Sjb#include <sys/filio.h>
39179237Sjb#include <sys/kdb.h>
40179237Sjb#include <sys/kernel.h>
41179237Sjb#include <sys/kmem.h>
42179237Sjb#include <sys/kthread.h>
43179237Sjb#include <sys/limits.h>
44179237Sjb#include <sys/linker.h>
45179237Sjb#include <sys/lock.h>
46179237Sjb#include <sys/malloc.h>
47179237Sjb#include <sys/module.h>
48179237Sjb#include <sys/mutex.h>
49179237Sjb#include <sys/pcpu.h>
50179237Sjb#include <sys/poll.h>
51179237Sjb#include <sys/proc.h>
52179237Sjb#include <sys/selinfo.h>
53179237Sjb#include <sys/smp.h>
54179237Sjb#include <sys/syscall.h>
55179237Sjb#include <sys/sysent.h>
56179237Sjb#include <sys/sysproto.h>
57179237Sjb#include <sys/uio.h>
58179237Sjb#include <sys/unistd.h>
59179237Sjb#include <machine/stdarg.h>
60179237Sjb
61179237Sjb#include <sys/dtrace.h>
62179237Sjb#include <sys/dtrace_bsd.h>
63179237Sjb
64227293Sedstatic MALLOC_DEFINE(M_FBT, "fbt", "Function Boundary Tracing");
65179237Sjb
66179237Sjb#define	FBT_PUSHL_EBP		0x55
67179237Sjb#define	FBT_MOVL_ESP_EBP0_V0	0x8b
68179237Sjb#define	FBT_MOVL_ESP_EBP1_V0	0xec
69179237Sjb#define	FBT_MOVL_ESP_EBP0_V1	0x89
70179237Sjb#define	FBT_MOVL_ESP_EBP1_V1	0xe5
71179237Sjb#define	FBT_REX_RSP_RBP		0x48
72179237Sjb
73179237Sjb#define	FBT_POPL_EBP		0x5d
74179237Sjb#define	FBT_RET			0xc3
75179237Sjb#define	FBT_RET_IMM16		0xc2
76179237Sjb#define	FBT_LEAVE		0xc9
77179237Sjb
78179237Sjb#ifdef __amd64__
79179237Sjb#define	FBT_PATCHVAL		0xcc
80179237Sjb#else
81179237Sjb#define	FBT_PATCHVAL		0xf0
82179237Sjb#endif
83179237Sjb
84179237Sjbstatic d_open_t	fbt_open;
85179237Sjbstatic int	fbt_unload(void);
86179237Sjbstatic void	fbt_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
87179237Sjbstatic void	fbt_provide_module(void *, modctl_t *);
88179237Sjbstatic void	fbt_destroy(void *, dtrace_id_t, void *);
89179237Sjbstatic void	fbt_enable(void *, dtrace_id_t, void *);
90179237Sjbstatic void	fbt_disable(void *, dtrace_id_t, void *);
91179237Sjbstatic void	fbt_load(void *);
92179237Sjbstatic void	fbt_suspend(void *, dtrace_id_t, void *);
93179237Sjbstatic void	fbt_resume(void *, dtrace_id_t, void *);
94179237Sjb
95179237Sjb#define	FBT_ENTRY	"entry"
96179237Sjb#define	FBT_RETURN	"return"
97179237Sjb#define	FBT_ADDR2NDX(addr)	((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
98179237Sjb#define	FBT_PROBETAB_SIZE	0x8000		/* 32k entries -- 128K total */
99179237Sjb
100179237Sjbstatic struct cdevsw fbt_cdevsw = {
101179237Sjb	.d_version	= D_VERSION,
102179237Sjb	.d_open		= fbt_open,
103179237Sjb	.d_name		= "fbt",
104179237Sjb};
105179237Sjb
106179237Sjbstatic dtrace_pattr_t fbt_attr = {
107179237Sjb{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
108179237Sjb{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
109179237Sjb{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
110179237Sjb{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
111179237Sjb{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
112179237Sjb};
113179237Sjb
114179237Sjbstatic dtrace_pops_t fbt_pops = {
115179237Sjb	NULL,
116179237Sjb	fbt_provide_module,
117179237Sjb	fbt_enable,
118179237Sjb	fbt_disable,
119179237Sjb	fbt_suspend,
120179237Sjb	fbt_resume,
121179237Sjb	fbt_getargdesc,
122179237Sjb	NULL,
123179237Sjb	NULL,
124179237Sjb	fbt_destroy
125179237Sjb};
126179237Sjb
127179237Sjbtypedef struct fbt_probe {
128179237Sjb	struct fbt_probe *fbtp_hashnext;
129179237Sjb	uint8_t		*fbtp_patchpoint;
130179237Sjb	int8_t		fbtp_rval;
131179237Sjb	uint8_t		fbtp_patchval;
132179237Sjb	uint8_t		fbtp_savedval;
133179237Sjb	uintptr_t	fbtp_roffset;
134179237Sjb	dtrace_id_t	fbtp_id;
135179237Sjb	const char	*fbtp_name;
136179237Sjb	modctl_t	*fbtp_ctl;
137179237Sjb	int		fbtp_loadcnt;
138179237Sjb	int		fbtp_primary;
139179237Sjb	int		fbtp_invop_cnt;
140179237Sjb	int		fbtp_symindx;
141179237Sjb	struct fbt_probe *fbtp_next;
142179237Sjb} fbt_probe_t;
143179237Sjb
144179237Sjbstatic struct cdev		*fbt_cdev;
145179237Sjbstatic dtrace_provider_id_t	fbt_id;
146179237Sjbstatic fbt_probe_t		**fbt_probetab;
147179237Sjbstatic int			fbt_probetab_size;
148179237Sjbstatic int			fbt_probetab_mask;
149179237Sjbstatic int			fbt_verbose = 0;
150179237Sjb
151179237Sjbstatic void
152179237Sjbfbt_doubletrap(void)
153179237Sjb{
154179237Sjb	fbt_probe_t *fbt;
155179237Sjb	int i;
156179237Sjb
157179237Sjb	for (i = 0; i < fbt_probetab_size; i++) {
158179237Sjb		fbt = fbt_probetab[i];
159179237Sjb
160179237Sjb		for (; fbt != NULL; fbt = fbt->fbtp_next)
161179237Sjb			*fbt->fbtp_patchpoint = fbt->fbtp_savedval;
162179237Sjb	}
163179237Sjb}
164179237Sjb
165179237Sjbstatic int
166179237Sjbfbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval)
167179237Sjb{
168179237Sjb	solaris_cpu_t *cpu = &solaris_cpu[curcpu];
169179237Sjb	uintptr_t stack0, stack1, stack2, stack3, stack4;
170179237Sjb	fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)];
171179237Sjb
172179237Sjb	for (; fbt != NULL; fbt = fbt->fbtp_hashnext) {
173179237Sjb		if ((uintptr_t)fbt->fbtp_patchpoint == addr) {
174179237Sjb			fbt->fbtp_invop_cnt++;
175179237Sjb			if (fbt->fbtp_roffset == 0) {
176179237Sjb				int i = 0;
177179237Sjb				/*
178179237Sjb				 * When accessing the arguments on the stack,
179179237Sjb				 * we must protect against accessing beyond
180179237Sjb				 * the stack.  We can safely set NOFAULT here
181179237Sjb				 * -- we know that interrupts are already
182179237Sjb				 * disabled.
183179237Sjb				 */
184179237Sjb				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
185179237Sjb				cpu->cpu_dtrace_caller = stack[i++];
186179237Sjb				stack0 = stack[i++];
187179237Sjb				stack1 = stack[i++];
188179237Sjb				stack2 = stack[i++];
189179237Sjb				stack3 = stack[i++];
190179237Sjb				stack4 = stack[i++];
191179237Sjb				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT |
192179237Sjb				    CPU_DTRACE_BADADDR);
193179237Sjb
194179237Sjb				dtrace_probe(fbt->fbtp_id, stack0, stack1,
195179237Sjb				    stack2, stack3, stack4);
196179237Sjb
197179237Sjb				cpu->cpu_dtrace_caller = 0;
198179237Sjb			} else {
199179237Sjb#ifdef __amd64__
200179237Sjb				/*
201179237Sjb				 * On amd64, we instrument the ret, not the
202179237Sjb				 * leave.  We therefore need to set the caller
203179237Sjb				 * to assure that the top frame of a stack()
204179237Sjb				 * action is correct.
205179237Sjb				 */
206179237Sjb				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
207179237Sjb				cpu->cpu_dtrace_caller = stack[0];
208179237Sjb				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT |
209179237Sjb				    CPU_DTRACE_BADADDR);
210179237Sjb#endif
211179237Sjb
212179237Sjb				dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset,
213179237Sjb				    rval, 0, 0, 0);
214179237Sjb				cpu->cpu_dtrace_caller = 0;
215179237Sjb			}
216179237Sjb
217179237Sjb			return (fbt->fbtp_rval);
218179237Sjb		}
219179237Sjb	}
220179237Sjb
221179237Sjb	return (0);
222179237Sjb}
223179237Sjb
224179237Sjbstatic int
225179237Sjbfbt_provide_module_function(linker_file_t lf, int symindx,
226179237Sjb    linker_symval_t *symval, void *opaque)
227179237Sjb{
228179237Sjb	char *modname = opaque;
229179237Sjb	const char *name = symval->name;
230179237Sjb	fbt_probe_t *fbt, *retfbt;
231179237Sjb	int j;
232179237Sjb	int size;
233179237Sjb	u_int8_t *instr, *limit;
234179237Sjb
235179237Sjb	if (strncmp(name, "dtrace_", 7) == 0 &&
236179237Sjb	    strncmp(name, "dtrace_safe_", 12) != 0) {
237179237Sjb		/*
238179237Sjb		 * Anything beginning with "dtrace_" may be called
239179237Sjb		 * from probe context unless it explicitly indicates
240179237Sjb		 * that it won't be called from probe context by
241179237Sjb		 * using the prefix "dtrace_safe_".
242179237Sjb		 */
243179237Sjb		return (0);
244179237Sjb	}
245179237Sjb
246179237Sjb	if (name[0] == '_' && name[1] == '_')
247179237Sjb		return (0);
248179237Sjb
249179237Sjb	size = symval->size;
250179237Sjb
251179237Sjb	instr = (u_int8_t *) symval->value;
252179237Sjb	limit = (u_int8_t *) symval->value + symval->size;
253179237Sjb
254179237Sjb#ifdef __amd64__
255179237Sjb	while (instr < limit) {
256179237Sjb		if (*instr == FBT_PUSHL_EBP)
257179237Sjb			break;
258179237Sjb
259179237Sjb		if ((size = dtrace_instr_size(instr)) <= 0)
260179237Sjb			break;
261179237Sjb
262179237Sjb		instr += size;
263179237Sjb	}
264179237Sjb
265179237Sjb	if (instr >= limit || *instr != FBT_PUSHL_EBP) {
266179237Sjb		/*
267179237Sjb		 * We either don't save the frame pointer in this
268179237Sjb		 * function, or we ran into some disassembly
269179237Sjb		 * screw-up.  Either way, we bail.
270179237Sjb		 */
271179237Sjb		return (0);
272179237Sjb	}
273179237Sjb#else
274179237Sjb	if (instr[0] != FBT_PUSHL_EBP)
275179237Sjb		return (0);
276179237Sjb
277179237Sjb	if (!(instr[1] == FBT_MOVL_ESP_EBP0_V0 &&
278179237Sjb	    instr[2] == FBT_MOVL_ESP_EBP1_V0) &&
279179237Sjb	    !(instr[1] == FBT_MOVL_ESP_EBP0_V1 &&
280179237Sjb	    instr[2] == FBT_MOVL_ESP_EBP1_V1))
281179237Sjb		return (0);
282179237Sjb#endif
283179237Sjb
284179237Sjb	fbt = malloc(sizeof (fbt_probe_t), M_FBT, M_WAITOK | M_ZERO);
285179237Sjb	fbt->fbtp_name = name;
286179237Sjb	fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
287179237Sjb	    name, FBT_ENTRY, 3, fbt);
288179237Sjb	fbt->fbtp_patchpoint = instr;
289179237Sjb	fbt->fbtp_ctl = lf;
290179237Sjb	fbt->fbtp_loadcnt = lf->loadcnt;
291179237Sjb	fbt->fbtp_rval = DTRACE_INVOP_PUSHL_EBP;
292179237Sjb	fbt->fbtp_savedval = *instr;
293179237Sjb	fbt->fbtp_patchval = FBT_PATCHVAL;
294179237Sjb	fbt->fbtp_symindx = symindx;
295179237Sjb
296179237Sjb	fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
297179237Sjb	fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
298179237Sjb
299179237Sjb	lf->fbt_nentries++;
300179237Sjb
301179237Sjb	retfbt = NULL;
302179237Sjbagain:
303179237Sjb	if (instr >= limit)
304179237Sjb		return (0);
305179237Sjb
306179237Sjb	/*
307179237Sjb	 * If this disassembly fails, then we've likely walked off into
308179237Sjb	 * a jump table or some other unsuitable area.  Bail out of the
309179237Sjb	 * disassembly now.
310179237Sjb	 */
311179237Sjb	if ((size = dtrace_instr_size(instr)) <= 0)
312179237Sjb		return (0);
313179237Sjb
314179237Sjb#ifdef __amd64__
315179237Sjb	/*
316179237Sjb	 * We only instrument "ret" on amd64 -- we don't yet instrument
317179237Sjb	 * ret imm16, largely because the compiler doesn't seem to
318179237Sjb	 * (yet) emit them in the kernel...
319179237Sjb	 */
320179237Sjb	if (*instr != FBT_RET) {
321179237Sjb		instr += size;
322179237Sjb		goto again;
323179237Sjb	}
324179237Sjb#else
325179237Sjb	if (!(size == 1 &&
326179237Sjb	    (*instr == FBT_POPL_EBP || *instr == FBT_LEAVE) &&
327179237Sjb	    (*(instr + 1) == FBT_RET ||
328179237Sjb	    *(instr + 1) == FBT_RET_IMM16))) {
329179237Sjb		instr += size;
330179237Sjb		goto again;
331179237Sjb	}
332179237Sjb#endif
333179237Sjb
334179237Sjb	/*
335179237Sjb	 * We (desperately) want to avoid erroneously instrumenting a
336179237Sjb	 * jump table, especially given that our markers are pretty
337179237Sjb	 * short:  two bytes on x86, and just one byte on amd64.  To
338179237Sjb	 * determine if we're looking at a true instruction sequence
339179237Sjb	 * or an inline jump table that happens to contain the same
340179237Sjb	 * byte sequences, we resort to some heuristic sleeze:  we
341179237Sjb	 * treat this instruction as being contained within a pointer,
342179237Sjb	 * and see if that pointer points to within the body of the
343179237Sjb	 * function.  If it does, we refuse to instrument it.
344179237Sjb	 */
345179237Sjb	for (j = 0; j < sizeof (uintptr_t); j++) {
346179237Sjb		caddr_t check = (caddr_t) instr - j;
347179237Sjb		uint8_t *ptr;
348179237Sjb
349179237Sjb		if (check < symval->value)
350179237Sjb			break;
351179237Sjb
352179237Sjb		if (check + sizeof (caddr_t) > (caddr_t)limit)
353179237Sjb			continue;
354179237Sjb
355179237Sjb		ptr = *(uint8_t **)check;
356179237Sjb
357179237Sjb		if (ptr >= (uint8_t *) symval->value && ptr < limit) {
358179237Sjb			instr += size;
359179237Sjb			goto again;
360179237Sjb		}
361179237Sjb	}
362179237Sjb
363179237Sjb	/*
364179237Sjb	 * We have a winner!
365179237Sjb	 */
366179237Sjb	fbt = malloc(sizeof (fbt_probe_t), M_FBT, M_WAITOK | M_ZERO);
367179237Sjb	fbt->fbtp_name = name;
368179237Sjb
369179237Sjb	if (retfbt == NULL) {
370179237Sjb		fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
371179237Sjb		    name, FBT_RETURN, 3, fbt);
372179237Sjb	} else {
373179237Sjb		retfbt->fbtp_next = fbt;
374179237Sjb		fbt->fbtp_id = retfbt->fbtp_id;
375179237Sjb	}
376179237Sjb
377179237Sjb	retfbt = fbt;
378179237Sjb	fbt->fbtp_patchpoint = instr;
379179237Sjb	fbt->fbtp_ctl = lf;
380179237Sjb	fbt->fbtp_loadcnt = lf->loadcnt;
381179237Sjb	fbt->fbtp_symindx = symindx;
382179237Sjb
383179237Sjb#ifndef __amd64__
384179237Sjb	if (*instr == FBT_POPL_EBP) {
385179237Sjb		fbt->fbtp_rval = DTRACE_INVOP_POPL_EBP;
386179237Sjb	} else {
387179237Sjb		ASSERT(*instr == FBT_LEAVE);
388179237Sjb		fbt->fbtp_rval = DTRACE_INVOP_LEAVE;
389179237Sjb	}
390179237Sjb	fbt->fbtp_roffset =
391179237Sjb	    (uintptr_t)(instr - (uint8_t *) symval->value) + 1;
392179237Sjb
393179237Sjb#else
394179237Sjb	ASSERT(*instr == FBT_RET);
395179237Sjb	fbt->fbtp_rval = DTRACE_INVOP_RET;
396179237Sjb	fbt->fbtp_roffset =
397179237Sjb	    (uintptr_t)(instr - (uint8_t *) symval->value);
398179237Sjb#endif
399179237Sjb
400179237Sjb	fbt->fbtp_savedval = *instr;
401179237Sjb	fbt->fbtp_patchval = FBT_PATCHVAL;
402179237Sjb	fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
403179237Sjb	fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
404179237Sjb
405179237Sjb	lf->fbt_nentries++;
406179237Sjb
407179237Sjb	instr += size;
408179237Sjb	goto again;
409179237Sjb}
410179237Sjb
411179237Sjbstatic void
412179237Sjbfbt_provide_module(void *arg, modctl_t *lf)
413179237Sjb{
414179237Sjb	char modname[MAXPATHLEN];
415179237Sjb	int i;
416179237Sjb	size_t len;
417179237Sjb
418179237Sjb	strlcpy(modname, lf->filename, sizeof(modname));
419179237Sjb	len = strlen(modname);
420179237Sjb	if (len > 3 && strcmp(modname + len - 3, ".ko") == 0)
421179237Sjb		modname[len - 3] = '\0';
422179237Sjb
423179237Sjb	/*
424179237Sjb	 * Employees of dtrace and their families are ineligible.  Void
425179237Sjb	 * where prohibited.
426179237Sjb	 */
427179237Sjb	if (strcmp(modname, "dtrace") == 0)
428179237Sjb		return;
429179237Sjb
430179237Sjb	/*
431179237Sjb	 * To register with DTrace, a module must list 'dtrace' as a
432179237Sjb	 * dependency in order for the kernel linker to resolve
433179237Sjb	 * symbols like dtrace_register(). All modules with such a
434179237Sjb	 * dependency are ineligible for FBT tracing.
435179237Sjb	 */
436179237Sjb	for (i = 0; i < lf->ndeps; i++)
437179237Sjb		if (strncmp(lf->deps[i]->filename, "dtrace", 6) == 0)
438179237Sjb			return;
439179237Sjb
440179237Sjb	if (lf->fbt_nentries) {
441179237Sjb		/*
442179237Sjb		 * This module has some FBT entries allocated; we're afraid
443179237Sjb		 * to screw with it.
444179237Sjb		 */
445179237Sjb		return;
446179237Sjb	}
447179237Sjb
448179237Sjb	/*
449179237Sjb	 * List the functions in the module and the symbol values.
450179237Sjb	 */
451179237Sjb	(void) linker_file_function_listall(lf, fbt_provide_module_function, modname);
452179237Sjb}
453179237Sjb
454179237Sjbstatic void
455179237Sjbfbt_destroy(void *arg, dtrace_id_t id, void *parg)
456179237Sjb{
457179237Sjb	fbt_probe_t *fbt = parg, *next, *hash, *last;
458179237Sjb	modctl_t *ctl;
459179237Sjb	int ndx;
460179237Sjb
461179237Sjb	do {
462179237Sjb		ctl = fbt->fbtp_ctl;
463179237Sjb
464179237Sjb		ctl->fbt_nentries--;
465179237Sjb
466179237Sjb		/*
467179237Sjb		 * Now we need to remove this probe from the fbt_probetab.
468179237Sjb		 */
469179237Sjb		ndx = FBT_ADDR2NDX(fbt->fbtp_patchpoint);
470179237Sjb		last = NULL;
471179237Sjb		hash = fbt_probetab[ndx];
472179237Sjb
473179237Sjb		while (hash != fbt) {
474179237Sjb			ASSERT(hash != NULL);
475179237Sjb			last = hash;
476179237Sjb			hash = hash->fbtp_hashnext;
477179237Sjb		}
478179237Sjb
479179237Sjb		if (last != NULL) {
480179237Sjb			last->fbtp_hashnext = fbt->fbtp_hashnext;
481179237Sjb		} else {
482179237Sjb			fbt_probetab[ndx] = fbt->fbtp_hashnext;
483179237Sjb		}
484179237Sjb
485179237Sjb		next = fbt->fbtp_next;
486179237Sjb		free(fbt, M_FBT);
487179237Sjb
488179237Sjb		fbt = next;
489179237Sjb	} while (fbt != NULL);
490179237Sjb}
491179237Sjb
492179237Sjbstatic void
493179237Sjbfbt_enable(void *arg, dtrace_id_t id, void *parg)
494179237Sjb{
495179237Sjb	fbt_probe_t *fbt = parg;
496179237Sjb	modctl_t *ctl = fbt->fbtp_ctl;
497179237Sjb
498179237Sjb	ctl->nenabled++;
499179237Sjb
500179237Sjb	/*
501179237Sjb	 * Now check that our modctl has the expected load count.  If it
502179237Sjb	 * doesn't, this module must have been unloaded and reloaded -- and
503179237Sjb	 * we're not going to touch it.
504179237Sjb	 */
505179237Sjb	if (ctl->loadcnt != fbt->fbtp_loadcnt) {
506179237Sjb		if (fbt_verbose) {
507179237Sjb			printf("fbt is failing for probe %s "
508179237Sjb			    "(module %s reloaded)",
509179237Sjb			    fbt->fbtp_name, ctl->filename);
510179237Sjb		}
511179237Sjb
512179237Sjb		return;
513179237Sjb	}
514179237Sjb
515179237Sjb	for (; fbt != NULL; fbt = fbt->fbtp_next) {
516179237Sjb		*fbt->fbtp_patchpoint = fbt->fbtp_patchval;
517179237Sjb	}
518179237Sjb}
519179237Sjb
520179237Sjbstatic void
521179237Sjbfbt_disable(void *arg, dtrace_id_t id, void *parg)
522179237Sjb{
523179237Sjb	fbt_probe_t *fbt = parg;
524179237Sjb	modctl_t *ctl = fbt->fbtp_ctl;
525179237Sjb
526179237Sjb	ASSERT(ctl->nenabled > 0);
527179237Sjb	ctl->nenabled--;
528179237Sjb
529179237Sjb	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
530179237Sjb		return;
531179237Sjb
532179237Sjb	for (; fbt != NULL; fbt = fbt->fbtp_next)
533179237Sjb		*fbt->fbtp_patchpoint = fbt->fbtp_savedval;
534179237Sjb}
535179237Sjb
536179237Sjbstatic void
537179237Sjbfbt_suspend(void *arg, dtrace_id_t id, void *parg)
538179237Sjb{
539179237Sjb	fbt_probe_t *fbt = parg;
540179237Sjb	modctl_t *ctl = fbt->fbtp_ctl;
541179237Sjb
542179237Sjb	ASSERT(ctl->nenabled > 0);
543179237Sjb
544179237Sjb	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
545179237Sjb		return;
546179237Sjb
547179237Sjb	for (; fbt != NULL; fbt = fbt->fbtp_next)
548179237Sjb		*fbt->fbtp_patchpoint = fbt->fbtp_savedval;
549179237Sjb}
550179237Sjb
551179237Sjbstatic void
552179237Sjbfbt_resume(void *arg, dtrace_id_t id, void *parg)
553179237Sjb{
554179237Sjb	fbt_probe_t *fbt = parg;
555179237Sjb	modctl_t *ctl = fbt->fbtp_ctl;
556179237Sjb
557179237Sjb	ASSERT(ctl->nenabled > 0);
558179237Sjb
559179237Sjb	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
560179237Sjb		return;
561179237Sjb
562179237Sjb	for (; fbt != NULL; fbt = fbt->fbtp_next)
563179237Sjb		*fbt->fbtp_patchpoint = fbt->fbtp_patchval;
564179237Sjb}
565179237Sjb
566179237Sjbstatic int
567179237Sjbfbt_ctfoff_init(modctl_t *lf, linker_ctf_t *lc)
568179237Sjb{
569179237Sjb	const Elf_Sym *symp = lc->symtab;;
570179237Sjb	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
571179237Sjb	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
572179237Sjb	int i;
573179237Sjb	uint32_t *ctfoff;
574179237Sjb	uint32_t objtoff = hp->cth_objtoff;
575179237Sjb	uint32_t funcoff = hp->cth_funcoff;
576179237Sjb	ushort_t info;
577179237Sjb	ushort_t vlen;
578179237Sjb
579179237Sjb	/* Sanity check. */
580179237Sjb	if (hp->cth_magic != CTF_MAGIC) {
581179237Sjb		printf("Bad magic value in CTF data of '%s'\n",lf->pathname);
582179237Sjb		return (EINVAL);
583179237Sjb	}
584179237Sjb
585179237Sjb	if (lc->symtab == NULL) {
586179237Sjb		printf("No symbol table in '%s'\n",lf->pathname);
587179237Sjb		return (EINVAL);
588179237Sjb	}
589179237Sjb
590179237Sjb	if ((ctfoff = malloc(sizeof(uint32_t) * lc->nsym, M_LINKER, M_WAITOK)) == NULL)
591179237Sjb		return (ENOMEM);
592179237Sjb
593179237Sjb	*lc->ctfoffp = ctfoff;
594179237Sjb
595179237Sjb	for (i = 0; i < lc->nsym; i++, ctfoff++, symp++) {
596179237Sjb		if (symp->st_name == 0 || symp->st_shndx == SHN_UNDEF) {
597179237Sjb			*ctfoff = 0xffffffff;
598179237Sjb			continue;
599179237Sjb		}
600179237Sjb
601179237Sjb		switch (ELF_ST_TYPE(symp->st_info)) {
602179237Sjb		case STT_OBJECT:
603179237Sjb			if (objtoff >= hp->cth_funcoff ||
604179237Sjb                            (symp->st_shndx == SHN_ABS && symp->st_value == 0)) {
605179237Sjb				*ctfoff = 0xffffffff;
606179237Sjb                                break;
607179237Sjb                        }
608179237Sjb
609179237Sjb                        *ctfoff = objtoff;
610179237Sjb                        objtoff += sizeof (ushort_t);
611179237Sjb			break;
612179237Sjb
613179237Sjb		case STT_FUNC:
614179237Sjb			if (funcoff >= hp->cth_typeoff) {
615179237Sjb				*ctfoff = 0xffffffff;
616179237Sjb				break;
617179237Sjb			}
618179237Sjb
619179237Sjb			*ctfoff = funcoff;
620179237Sjb
621179237Sjb			info = *((const ushort_t *)(ctfdata + funcoff));
622179237Sjb			vlen = CTF_INFO_VLEN(info);
623179237Sjb
624179237Sjb			/*
625179237Sjb			 * If we encounter a zero pad at the end, just skip it.
626179237Sjb			 * Otherwise skip over the function and its return type
627179237Sjb			 * (+2) and the argument list (vlen).
628179237Sjb			 */
629179237Sjb			if (CTF_INFO_KIND(info) == CTF_K_UNKNOWN && vlen == 0)
630179237Sjb				funcoff += sizeof (ushort_t); /* skip pad */
631179237Sjb			else
632179237Sjb				funcoff += sizeof (ushort_t) * (vlen + 2);
633179237Sjb			break;
634179237Sjb
635179237Sjb		default:
636179237Sjb			*ctfoff = 0xffffffff;
637179237Sjb			break;
638179237Sjb		}
639179237Sjb	}
640179237Sjb
641179237Sjb	return (0);
642179237Sjb}
643179237Sjb
644179237Sjbstatic ssize_t
645179237Sjbfbt_get_ctt_size(uint8_t version, const ctf_type_t *tp, ssize_t *sizep,
646179237Sjb    ssize_t *incrementp)
647179237Sjb{
648179237Sjb	ssize_t size, increment;
649179237Sjb
650179237Sjb	if (version > CTF_VERSION_1 &&
651179237Sjb	    tp->ctt_size == CTF_LSIZE_SENT) {
652179237Sjb		size = CTF_TYPE_LSIZE(tp);
653179237Sjb		increment = sizeof (ctf_type_t);
654179237Sjb	} else {
655179237Sjb		size = tp->ctt_size;
656179237Sjb		increment = sizeof (ctf_stype_t);
657179237Sjb	}
658179237Sjb
659179237Sjb	if (sizep)
660179237Sjb		*sizep = size;
661179237Sjb	if (incrementp)
662179237Sjb		*incrementp = increment;
663179237Sjb
664179237Sjb	return (size);
665179237Sjb}
666179237Sjb
667179237Sjbstatic int
668179237Sjbfbt_typoff_init(linker_ctf_t *lc)
669179237Sjb{
670179237Sjb	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
671179237Sjb	const ctf_type_t *tbuf;
672179237Sjb	const ctf_type_t *tend;
673179237Sjb	const ctf_type_t *tp;
674179237Sjb	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
675179237Sjb	int ctf_typemax = 0;
676179237Sjb	uint32_t *xp;
677179237Sjb	ulong_t pop[CTF_K_MAX + 1] = { 0 };
678179237Sjb
679179237Sjb
680179237Sjb	/* Sanity check. */
681179237Sjb	if (hp->cth_magic != CTF_MAGIC)
682179237Sjb		return (EINVAL);
683179237Sjb
684179237Sjb	tbuf = (const ctf_type_t *) (ctfdata + hp->cth_typeoff);
685179237Sjb	tend = (const ctf_type_t *) (ctfdata + hp->cth_stroff);
686179237Sjb
687179237Sjb	int child = hp->cth_parname != 0;
688179237Sjb
689179237Sjb	/*
690179237Sjb	 * We make two passes through the entire type section.  In this first
691179237Sjb	 * pass, we count the number of each type and the total number of types.
692179237Sjb	 */
693179237Sjb	for (tp = tbuf; tp < tend; ctf_typemax++) {
694179237Sjb		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
695179237Sjb		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
696179237Sjb		ssize_t size, increment;
697179237Sjb
698179237Sjb		size_t vbytes;
699179237Sjb		uint_t n;
700179237Sjb
701179237Sjb		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
702179237Sjb
703179237Sjb		switch (kind) {
704179237Sjb		case CTF_K_INTEGER:
705179237Sjb		case CTF_K_FLOAT:
706179237Sjb			vbytes = sizeof (uint_t);
707179237Sjb			break;
708179237Sjb		case CTF_K_ARRAY:
709179237Sjb			vbytes = sizeof (ctf_array_t);
710179237Sjb			break;
711179237Sjb		case CTF_K_FUNCTION:
712179237Sjb			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
713179237Sjb			break;
714179237Sjb		case CTF_K_STRUCT:
715179237Sjb		case CTF_K_UNION:
716179237Sjb			if (size < CTF_LSTRUCT_THRESH) {
717179237Sjb				ctf_member_t *mp = (ctf_member_t *)
718179237Sjb				    ((uintptr_t)tp + increment);
719179237Sjb
720179237Sjb				vbytes = sizeof (ctf_member_t) * vlen;
721179237Sjb				for (n = vlen; n != 0; n--, mp++)
722179237Sjb					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
723179237Sjb			} else {
724179237Sjb				ctf_lmember_t *lmp = (ctf_lmember_t *)
725179237Sjb				    ((uintptr_t)tp + increment);
726179237Sjb
727179237Sjb				vbytes = sizeof (ctf_lmember_t) * vlen;
728179237Sjb				for (n = vlen; n != 0; n--, lmp++)
729179237Sjb					child |=
730179237Sjb					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
731179237Sjb			}
732179237Sjb			break;
733179237Sjb		case CTF_K_ENUM:
734179237Sjb			vbytes = sizeof (ctf_enum_t) * vlen;
735179237Sjb			break;
736179237Sjb		case CTF_K_FORWARD:
737179237Sjb			/*
738179237Sjb			 * For forward declarations, ctt_type is the CTF_K_*
739179237Sjb			 * kind for the tag, so bump that population count too.
740179237Sjb			 * If ctt_type is unknown, treat the tag as a struct.
741179237Sjb			 */
742179237Sjb			if (tp->ctt_type == CTF_K_UNKNOWN ||
743179237Sjb			    tp->ctt_type >= CTF_K_MAX)
744179237Sjb				pop[CTF_K_STRUCT]++;
745179237Sjb			else
746179237Sjb				pop[tp->ctt_type]++;
747179237Sjb			/*FALLTHRU*/
748179237Sjb		case CTF_K_UNKNOWN:
749179237Sjb			vbytes = 0;
750179237Sjb			break;
751179237Sjb		case CTF_K_POINTER:
752179237Sjb		case CTF_K_TYPEDEF:
753179237Sjb		case CTF_K_VOLATILE:
754179237Sjb		case CTF_K_CONST:
755179237Sjb		case CTF_K_RESTRICT:
756179237Sjb			child |= CTF_TYPE_ISCHILD(tp->ctt_type);
757179237Sjb			vbytes = 0;
758179237Sjb			break;
759179237Sjb		default:
760179237Sjb			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
761179237Sjb			return (EIO);
762179237Sjb		}
763179237Sjb		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
764179237Sjb		pop[kind]++;
765179237Sjb	}
766179237Sjb
767248640Savg	/* account for a sentinel value below */
768248640Savg	ctf_typemax++;
769179237Sjb	*lc->typlenp = ctf_typemax;
770179237Sjb
771179237Sjb	if ((xp = malloc(sizeof(uint32_t) * ctf_typemax, M_LINKER, M_ZERO | M_WAITOK)) == NULL)
772179237Sjb		return (ENOMEM);
773179237Sjb
774179237Sjb	*lc->typoffp = xp;
775179237Sjb
776179237Sjb	/* type id 0 is used as a sentinel value */
777179237Sjb	*xp++ = 0;
778179237Sjb
779179237Sjb	/*
780179237Sjb	 * In the second pass, fill in the type offset.
781179237Sjb	 */
782179237Sjb	for (tp = tbuf; tp < tend; xp++) {
783179237Sjb		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
784179237Sjb		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
785179237Sjb		ssize_t size, increment;
786179237Sjb
787179237Sjb		size_t vbytes;
788179237Sjb		uint_t n;
789179237Sjb
790179237Sjb		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
791179237Sjb
792179237Sjb		switch (kind) {
793179237Sjb		case CTF_K_INTEGER:
794179237Sjb		case CTF_K_FLOAT:
795179237Sjb			vbytes = sizeof (uint_t);
796179237Sjb			break;
797179237Sjb		case CTF_K_ARRAY:
798179237Sjb			vbytes = sizeof (ctf_array_t);
799179237Sjb			break;
800179237Sjb		case CTF_K_FUNCTION:
801179237Sjb			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
802179237Sjb			break;
803179237Sjb		case CTF_K_STRUCT:
804179237Sjb		case CTF_K_UNION:
805179237Sjb			if (size < CTF_LSTRUCT_THRESH) {
806179237Sjb				ctf_member_t *mp = (ctf_member_t *)
807179237Sjb				    ((uintptr_t)tp + increment);
808179237Sjb
809179237Sjb				vbytes = sizeof (ctf_member_t) * vlen;
810179237Sjb				for (n = vlen; n != 0; n--, mp++)
811179237Sjb					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
812179237Sjb			} else {
813179237Sjb				ctf_lmember_t *lmp = (ctf_lmember_t *)
814179237Sjb				    ((uintptr_t)tp + increment);
815179237Sjb
816179237Sjb				vbytes = sizeof (ctf_lmember_t) * vlen;
817179237Sjb				for (n = vlen; n != 0; n--, lmp++)
818179237Sjb					child |=
819179237Sjb					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
820179237Sjb			}
821179237Sjb			break;
822179237Sjb		case CTF_K_ENUM:
823179237Sjb			vbytes = sizeof (ctf_enum_t) * vlen;
824179237Sjb			break;
825179237Sjb		case CTF_K_FORWARD:
826179237Sjb		case CTF_K_UNKNOWN:
827179237Sjb			vbytes = 0;
828179237Sjb			break;
829179237Sjb		case CTF_K_POINTER:
830179237Sjb		case CTF_K_TYPEDEF:
831179237Sjb		case CTF_K_VOLATILE:
832179237Sjb		case CTF_K_CONST:
833179237Sjb		case CTF_K_RESTRICT:
834179237Sjb			vbytes = 0;
835179237Sjb			break;
836179237Sjb		default:
837179237Sjb			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
838179237Sjb			return (EIO);
839179237Sjb		}
840179237Sjb		*xp = (uint32_t)((uintptr_t) tp - (uintptr_t) ctfdata);
841179237Sjb		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
842179237Sjb	}
843179237Sjb
844179237Sjb	return (0);
845179237Sjb}
846179237Sjb
847179237Sjb/*
848179237Sjb * CTF Declaration Stack
849179237Sjb *
850179237Sjb * In order to implement ctf_type_name(), we must convert a type graph back
851179237Sjb * into a C type declaration.  Unfortunately, a type graph represents a storage
852179237Sjb * class ordering of the type whereas a type declaration must obey the C rules
853179237Sjb * for operator precedence, and the two orderings are frequently in conflict.
854179237Sjb * For example, consider these CTF type graphs and their C declarations:
855179237Sjb *
856179237Sjb * CTF_K_POINTER -> CTF_K_FUNCTION -> CTF_K_INTEGER  : int (*)()
857179237Sjb * CTF_K_POINTER -> CTF_K_ARRAY -> CTF_K_INTEGER     : int (*)[]
858179237Sjb *
859179237Sjb * In each case, parentheses are used to raise operator * to higher lexical
860179237Sjb * precedence, so the string form of the C declaration cannot be constructed by
861179237Sjb * walking the type graph links and forming the string from left to right.
862179237Sjb *
863179237Sjb * The functions in this file build a set of stacks from the type graph nodes
864179237Sjb * corresponding to the C operator precedence levels in the appropriate order.
865179237Sjb * The code in ctf_type_name() can then iterate over the levels and nodes in
866179237Sjb * lexical precedence order and construct the final C declaration string.
867179237Sjb */
868179237Sjbtypedef struct ctf_list {
869179237Sjb	struct ctf_list *l_prev; /* previous pointer or tail pointer */
870179237Sjb	struct ctf_list *l_next; /* next pointer or head pointer */
871179237Sjb} ctf_list_t;
872179237Sjb
873179237Sjb#define	ctf_list_prev(elem)	((void *)(((ctf_list_t *)(elem))->l_prev))
874179237Sjb#define	ctf_list_next(elem)	((void *)(((ctf_list_t *)(elem))->l_next))
875179237Sjb
876179237Sjbtypedef enum {
877179237Sjb	CTF_PREC_BASE,
878179237Sjb	CTF_PREC_POINTER,
879179237Sjb	CTF_PREC_ARRAY,
880179237Sjb	CTF_PREC_FUNCTION,
881179237Sjb	CTF_PREC_MAX
882179237Sjb} ctf_decl_prec_t;
883179237Sjb
884179237Sjbtypedef struct ctf_decl_node {
885179237Sjb	ctf_list_t cd_list;			/* linked list pointers */
886179237Sjb	ctf_id_t cd_type;			/* type identifier */
887179237Sjb	uint_t cd_kind;				/* type kind */
888179237Sjb	uint_t cd_n;				/* type dimension if array */
889179237Sjb} ctf_decl_node_t;
890179237Sjb
891179237Sjbtypedef struct ctf_decl {
892179237Sjb	ctf_list_t cd_nodes[CTF_PREC_MAX];	/* declaration node stacks */
893179237Sjb	int cd_order[CTF_PREC_MAX];		/* storage order of decls */
894179237Sjb	ctf_decl_prec_t cd_qualp;		/* qualifier precision */
895179237Sjb	ctf_decl_prec_t cd_ordp;		/* ordered precision */
896179237Sjb	char *cd_buf;				/* buffer for output */
897179237Sjb	char *cd_ptr;				/* buffer location */
898179237Sjb	char *cd_end;				/* buffer limit */
899179237Sjb	size_t cd_len;				/* buffer space required */
900179237Sjb	int cd_err;				/* saved error value */
901179237Sjb} ctf_decl_t;
902179237Sjb
903179237Sjb/*
904179237Sjb * Simple doubly-linked list append routine.  This implementation assumes that
905179237Sjb * each list element contains an embedded ctf_list_t as the first member.
906179237Sjb * An additional ctf_list_t is used to store the head (l_next) and tail
907179237Sjb * (l_prev) pointers.  The current head and tail list elements have their
908179237Sjb * previous and next pointers set to NULL, respectively.
909179237Sjb */
910179237Sjbstatic void
911179237Sjbctf_list_append(ctf_list_t *lp, void *new)
912179237Sjb{
913179237Sjb	ctf_list_t *p = lp->l_prev;	/* p = tail list element */
914179237Sjb	ctf_list_t *q = new;		/* q = new list element */
915179237Sjb
916179237Sjb	lp->l_prev = q;
917179237Sjb	q->l_prev = p;
918179237Sjb	q->l_next = NULL;
919179237Sjb
920179237Sjb	if (p != NULL)
921179237Sjb		p->l_next = q;
922179237Sjb	else
923179237Sjb		lp->l_next = q;
924179237Sjb}
925179237Sjb
926179237Sjb/*
927179237Sjb * Prepend the specified existing element to the given ctf_list_t.  The
928179237Sjb * existing pointer should be pointing at a struct with embedded ctf_list_t.
929179237Sjb */
930179237Sjbstatic void
931179237Sjbctf_list_prepend(ctf_list_t *lp, void *new)
932179237Sjb{
933179237Sjb	ctf_list_t *p = new;		/* p = new list element */
934179237Sjb	ctf_list_t *q = lp->l_next;	/* q = head list element */
935179237Sjb
936179237Sjb	lp->l_next = p;
937179237Sjb	p->l_prev = NULL;
938179237Sjb	p->l_next = q;
939179237Sjb
940179237Sjb	if (q != NULL)
941179237Sjb		q->l_prev = p;
942179237Sjb	else
943179237Sjb		lp->l_prev = p;
944179237Sjb}
945179237Sjb
946179237Sjbstatic void
947179237Sjbctf_decl_init(ctf_decl_t *cd, char *buf, size_t len)
948179237Sjb{
949179237Sjb	int i;
950179237Sjb
951179237Sjb	bzero(cd, sizeof (ctf_decl_t));
952179237Sjb
953179237Sjb	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++)
954179237Sjb		cd->cd_order[i] = CTF_PREC_BASE - 1;
955179237Sjb
956179237Sjb	cd->cd_qualp = CTF_PREC_BASE;
957179237Sjb	cd->cd_ordp = CTF_PREC_BASE;
958179237Sjb
959179237Sjb	cd->cd_buf = buf;
960179237Sjb	cd->cd_ptr = buf;
961179237Sjb	cd->cd_end = buf + len;
962179237Sjb}
963179237Sjb
964179237Sjbstatic void
965179237Sjbctf_decl_fini(ctf_decl_t *cd)
966179237Sjb{
967179237Sjb	ctf_decl_node_t *cdp, *ndp;
968179237Sjb	int i;
969179237Sjb
970179237Sjb	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++) {
971179237Sjb		for (cdp = ctf_list_next(&cd->cd_nodes[i]);
972179237Sjb		    cdp != NULL; cdp = ndp) {
973179237Sjb			ndp = ctf_list_next(cdp);
974179237Sjb			free(cdp, M_FBT);
975179237Sjb		}
976179237Sjb	}
977179237Sjb}
978179237Sjb
979179237Sjbstatic const ctf_type_t *
980179237Sjbctf_lookup_by_id(linker_ctf_t *lc, ctf_id_t type)
981179237Sjb{
982179237Sjb	const ctf_type_t *tp;
983179237Sjb	uint32_t offset;
984179237Sjb	uint32_t *typoff = *lc->typoffp;
985179237Sjb
986179237Sjb	if (type >= *lc->typlenp) {
987179237Sjb		printf("%s(%d): type %d exceeds max %ld\n",__func__,__LINE__,(int) type,*lc->typlenp);
988179237Sjb		return(NULL);
989179237Sjb	}
990179237Sjb
991179237Sjb	/* Check if the type isn't cross-referenced. */
992179237Sjb	if ((offset = typoff[type]) == 0) {
993179237Sjb		printf("%s(%d): type %d isn't cross referenced\n",__func__,__LINE__, (int) type);
994179237Sjb		return(NULL);
995179237Sjb	}
996179237Sjb
997179237Sjb	tp = (const ctf_type_t *)(lc->ctftab + offset + sizeof(ctf_header_t));
998179237Sjb
999179237Sjb	return (tp);
1000179237Sjb}
1001179237Sjb
1002179237Sjbstatic void
1003179237Sjbfbt_array_info(linker_ctf_t *lc, ctf_id_t type, ctf_arinfo_t *arp)
1004179237Sjb{
1005179237Sjb	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
1006179237Sjb	const ctf_type_t *tp;
1007179237Sjb	const ctf_array_t *ap;
1008179237Sjb	ssize_t increment;
1009179237Sjb
1010179237Sjb	bzero(arp, sizeof(*arp));
1011179237Sjb
1012179237Sjb	if ((tp = ctf_lookup_by_id(lc, type)) == NULL)
1013179237Sjb		return;
1014179237Sjb
1015179237Sjb	if (CTF_INFO_KIND(tp->ctt_info) != CTF_K_ARRAY)
1016179237Sjb		return;
1017179237Sjb
1018179237Sjb	(void) fbt_get_ctt_size(hp->cth_version, tp, NULL, &increment);
1019179237Sjb
1020179237Sjb	ap = (const ctf_array_t *)((uintptr_t)tp + increment);
1021179237Sjb	arp->ctr_contents = ap->cta_contents;
1022179237Sjb	arp->ctr_index = ap->cta_index;
1023179237Sjb	arp->ctr_nelems = ap->cta_nelems;
1024179237Sjb}
1025179237Sjb
1026179237Sjbstatic const char *
1027179237Sjbctf_strptr(linker_ctf_t *lc, int name)
1028179237Sjb{
1029179237Sjb	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;;
1030179237Sjb	const char *strp = "";
1031179237Sjb
1032179237Sjb	if (name < 0 || name >= hp->cth_strlen)
1033179237Sjb		return(strp);
1034179237Sjb
1035179237Sjb	strp = (const char *)(lc->ctftab + hp->cth_stroff + name + sizeof(ctf_header_t));
1036179237Sjb
1037179237Sjb	return (strp);
1038179237Sjb}
1039179237Sjb
1040179237Sjbstatic void
1041179237Sjbctf_decl_push(ctf_decl_t *cd, linker_ctf_t *lc, ctf_id_t type)
1042179237Sjb{
1043179237Sjb	ctf_decl_node_t *cdp;
1044179237Sjb	ctf_decl_prec_t prec;
1045179237Sjb	uint_t kind, n = 1;
1046179237Sjb	int is_qual = 0;
1047179237Sjb
1048179237Sjb	const ctf_type_t *tp;
1049179237Sjb	ctf_arinfo_t ar;
1050179237Sjb
1051179237Sjb	if ((tp = ctf_lookup_by_id(lc, type)) == NULL) {
1052179237Sjb		cd->cd_err = ENOENT;
1053179237Sjb		return;
1054179237Sjb	}
1055179237Sjb
1056179237Sjb	switch (kind = CTF_INFO_KIND(tp->ctt_info)) {
1057179237Sjb	case CTF_K_ARRAY:
1058179237Sjb		fbt_array_info(lc, type, &ar);
1059179237Sjb		ctf_decl_push(cd, lc, ar.ctr_contents);
1060179237Sjb		n = ar.ctr_nelems;
1061179237Sjb		prec = CTF_PREC_ARRAY;
1062179237Sjb		break;
1063179237Sjb
1064179237Sjb	case CTF_K_TYPEDEF:
1065179237Sjb		if (ctf_strptr(lc, tp->ctt_name)[0] == '\0') {
1066179237Sjb			ctf_decl_push(cd, lc, tp->ctt_type);
1067179237Sjb			return;
1068179237Sjb		}
1069179237Sjb		prec = CTF_PREC_BASE;
1070179237Sjb		break;
1071179237Sjb
1072179237Sjb	case CTF_K_FUNCTION:
1073179237Sjb		ctf_decl_push(cd, lc, tp->ctt_type);
1074179237Sjb		prec = CTF_PREC_FUNCTION;
1075179237Sjb		break;
1076179237Sjb
1077179237Sjb	case CTF_K_POINTER:
1078179237Sjb		ctf_decl_push(cd, lc, tp->ctt_type);
1079179237Sjb		prec = CTF_PREC_POINTER;
1080179237Sjb		break;
1081179237Sjb
1082179237Sjb	case CTF_K_VOLATILE:
1083179237Sjb	case CTF_K_CONST:
1084179237Sjb	case CTF_K_RESTRICT:
1085179237Sjb		ctf_decl_push(cd, lc, tp->ctt_type);
1086179237Sjb		prec = cd->cd_qualp;
1087179237Sjb		is_qual++;
1088179237Sjb		break;
1089179237Sjb
1090179237Sjb	default:
1091179237Sjb		prec = CTF_PREC_BASE;
1092179237Sjb	}
1093179237Sjb
1094179237Sjb	if ((cdp = malloc(sizeof (ctf_decl_node_t), M_FBT, M_WAITOK)) == NULL) {
1095179237Sjb		cd->cd_err = EAGAIN;
1096179237Sjb		return;
1097179237Sjb	}
1098179237Sjb
1099179237Sjb	cdp->cd_type = type;
1100179237Sjb	cdp->cd_kind = kind;
1101179237Sjb	cdp->cd_n = n;
1102179237Sjb
1103179237Sjb	if (ctf_list_next(&cd->cd_nodes[prec]) == NULL)
1104179237Sjb		cd->cd_order[prec] = cd->cd_ordp++;
1105179237Sjb
1106179237Sjb	/*
1107179237Sjb	 * Reset cd_qualp to the highest precedence level that we've seen so
1108179237Sjb	 * far that can be qualified (CTF_PREC_BASE or CTF_PREC_POINTER).
1109179237Sjb	 */
1110179237Sjb	if (prec > cd->cd_qualp && prec < CTF_PREC_ARRAY)
1111179237Sjb		cd->cd_qualp = prec;
1112179237Sjb
1113179237Sjb	/*
1114179237Sjb	 * C array declarators are ordered inside out so prepend them.  Also by
1115179237Sjb	 * convention qualifiers of base types precede the type specifier (e.g.
1116179237Sjb	 * const int vs. int const) even though the two forms are equivalent.
1117179237Sjb	 */
1118179237Sjb	if (kind == CTF_K_ARRAY || (is_qual && prec == CTF_PREC_BASE))
1119179237Sjb		ctf_list_prepend(&cd->cd_nodes[prec], cdp);
1120179237Sjb	else
1121179237Sjb		ctf_list_append(&cd->cd_nodes[prec], cdp);
1122179237Sjb}
1123179237Sjb
1124179237Sjbstatic void
1125179237Sjbctf_decl_sprintf(ctf_decl_t *cd, const char *format, ...)
1126179237Sjb{
1127179237Sjb	size_t len = (size_t)(cd->cd_end - cd->cd_ptr);
1128179237Sjb	va_list ap;
1129179237Sjb	size_t n;
1130179237Sjb
1131179237Sjb	va_start(ap, format);
1132179237Sjb	n = vsnprintf(cd->cd_ptr, len, format, ap);
1133179237Sjb	va_end(ap);
1134179237Sjb
1135179237Sjb	cd->cd_ptr += MIN(n, len);
1136179237Sjb	cd->cd_len += n;
1137179237Sjb}
1138179237Sjb
1139179237Sjbstatic ssize_t
1140179237Sjbfbt_type_name(linker_ctf_t *lc, ctf_id_t type, char *buf, size_t len)
1141179237Sjb{
1142179237Sjb	ctf_decl_t cd;
1143179237Sjb	ctf_decl_node_t *cdp;
1144179237Sjb	ctf_decl_prec_t prec, lp, rp;
1145179237Sjb	int ptr, arr;
1146179237Sjb	uint_t k;
1147179237Sjb
1148179237Sjb	if (lc == NULL && type == CTF_ERR)
1149179237Sjb		return (-1); /* simplify caller code by permitting CTF_ERR */
1150179237Sjb
1151179237Sjb	ctf_decl_init(&cd, buf, len);
1152179237Sjb	ctf_decl_push(&cd, lc, type);
1153179237Sjb
1154179237Sjb	if (cd.cd_err != 0) {
1155179237Sjb		ctf_decl_fini(&cd);
1156179237Sjb		return (-1);
1157179237Sjb	}
1158179237Sjb
1159179237Sjb	/*
1160179237Sjb	 * If the type graph's order conflicts with lexical precedence order
1161179237Sjb	 * for pointers or arrays, then we need to surround the declarations at
1162179237Sjb	 * the corresponding lexical precedence with parentheses.  This can
1163179237Sjb	 * result in either a parenthesized pointer (*) as in int (*)() or
1164179237Sjb	 * int (*)[], or in a parenthesized pointer and array as in int (*[])().
1165179237Sjb	 */
1166179237Sjb	ptr = cd.cd_order[CTF_PREC_POINTER] > CTF_PREC_POINTER;
1167179237Sjb	arr = cd.cd_order[CTF_PREC_ARRAY] > CTF_PREC_ARRAY;
1168179237Sjb
1169179237Sjb	rp = arr ? CTF_PREC_ARRAY : ptr ? CTF_PREC_POINTER : -1;
1170179237Sjb	lp = ptr ? CTF_PREC_POINTER : arr ? CTF_PREC_ARRAY : -1;
1171179237Sjb
1172179237Sjb	k = CTF_K_POINTER; /* avoid leading whitespace (see below) */
1173179237Sjb
1174179237Sjb	for (prec = CTF_PREC_BASE; prec < CTF_PREC_MAX; prec++) {
1175179237Sjb		for (cdp = ctf_list_next(&cd.cd_nodes[prec]);
1176179237Sjb		    cdp != NULL; cdp = ctf_list_next(cdp)) {
1177179237Sjb
1178179237Sjb			const ctf_type_t *tp =
1179179237Sjb			    ctf_lookup_by_id(lc, cdp->cd_type);
1180179237Sjb			const char *name = ctf_strptr(lc, tp->ctt_name);
1181179237Sjb
1182179237Sjb			if (k != CTF_K_POINTER && k != CTF_K_ARRAY)
1183179237Sjb				ctf_decl_sprintf(&cd, " ");
1184179237Sjb
1185179237Sjb			if (lp == prec) {
1186179237Sjb				ctf_decl_sprintf(&cd, "(");
1187179237Sjb				lp = -1;
1188179237Sjb			}
1189179237Sjb
1190179237Sjb			switch (cdp->cd_kind) {
1191179237Sjb			case CTF_K_INTEGER:
1192179237Sjb			case CTF_K_FLOAT:
1193179237Sjb			case CTF_K_TYPEDEF:
1194179237Sjb				ctf_decl_sprintf(&cd, "%s", name);
1195179237Sjb				break;
1196179237Sjb			case CTF_K_POINTER:
1197179237Sjb				ctf_decl_sprintf(&cd, "*");
1198179237Sjb				break;
1199179237Sjb			case CTF_K_ARRAY:
1200179237Sjb				ctf_decl_sprintf(&cd, "[%u]", cdp->cd_n);
1201179237Sjb				break;
1202179237Sjb			case CTF_K_FUNCTION:
1203179237Sjb				ctf_decl_sprintf(&cd, "()");
1204179237Sjb				break;
1205179237Sjb			case CTF_K_STRUCT:
1206179237Sjb			case CTF_K_FORWARD:
1207179237Sjb				ctf_decl_sprintf(&cd, "struct %s", name);
1208179237Sjb				break;
1209179237Sjb			case CTF_K_UNION:
1210179237Sjb				ctf_decl_sprintf(&cd, "union %s", name);
1211179237Sjb				break;
1212179237Sjb			case CTF_K_ENUM:
1213179237Sjb				ctf_decl_sprintf(&cd, "enum %s", name);
1214179237Sjb				break;
1215179237Sjb			case CTF_K_VOLATILE:
1216179237Sjb				ctf_decl_sprintf(&cd, "volatile");
1217179237Sjb				break;
1218179237Sjb			case CTF_K_CONST:
1219179237Sjb				ctf_decl_sprintf(&cd, "const");
1220179237Sjb				break;
1221179237Sjb			case CTF_K_RESTRICT:
1222179237Sjb				ctf_decl_sprintf(&cd, "restrict");
1223179237Sjb				break;
1224179237Sjb			}
1225179237Sjb
1226179237Sjb			k = cdp->cd_kind;
1227179237Sjb		}
1228179237Sjb
1229179237Sjb		if (rp == prec)
1230179237Sjb			ctf_decl_sprintf(&cd, ")");
1231179237Sjb	}
1232179237Sjb
1233179237Sjb	ctf_decl_fini(&cd);
1234179237Sjb	return (cd.cd_len);
1235179237Sjb}
1236179237Sjb
1237179237Sjbstatic void
1238179237Sjbfbt_getargdesc(void *arg __unused, dtrace_id_t id __unused, void *parg, dtrace_argdesc_t *desc)
1239179237Sjb{
1240179237Sjb	const ushort_t *dp;
1241179237Sjb	fbt_probe_t *fbt = parg;
1242179237Sjb	linker_ctf_t lc;
1243179237Sjb	modctl_t *ctl = fbt->fbtp_ctl;
1244179237Sjb	int ndx = desc->dtargd_ndx;
1245179237Sjb	int symindx = fbt->fbtp_symindx;
1246179237Sjb	uint32_t *ctfoff;
1247179237Sjb	uint32_t offset;
1248179237Sjb	ushort_t info, kind, n;
1249179237Sjb
1250248642Savg	if (fbt->fbtp_roffset != 0 && desc->dtargd_ndx == 0) {
1251248642Savg		(void) strcpy(desc->dtargd_native, "int");
1252248642Savg		return;
1253248642Savg	}
1254248642Savg
1255179237Sjb	desc->dtargd_ndx = DTRACE_ARGNONE;
1256179237Sjb
1257179237Sjb	/* Get a pointer to the CTF data and it's length. */
1258179237Sjb	if (linker_ctf_get(ctl, &lc) != 0)
1259179237Sjb		/* No CTF data? Something wrong? *shrug* */
1260179237Sjb		return;
1261179237Sjb
1262179237Sjb	/* Check if this module hasn't been initialised yet. */
1263179237Sjb	if (*lc.ctfoffp == NULL) {
1264179237Sjb		/*
1265179237Sjb		 * Initialise the CTF object and function symindx to
1266179237Sjb		 * byte offset array.
1267179237Sjb		 */
1268179237Sjb		if (fbt_ctfoff_init(ctl, &lc) != 0)
1269179237Sjb			return;
1270179237Sjb
1271179237Sjb		/* Initialise the CTF type to byte offset array. */
1272179237Sjb		if (fbt_typoff_init(&lc) != 0)
1273179237Sjb			return;
1274179237Sjb	}
1275179237Sjb
1276179237Sjb	ctfoff = *lc.ctfoffp;
1277179237Sjb
1278179237Sjb	if (ctfoff == NULL || *lc.typoffp == NULL)
1279179237Sjb		return;
1280179237Sjb
1281179237Sjb	/* Check if the symbol index is out of range. */
1282179237Sjb	if (symindx >= lc.nsym)
1283179237Sjb		return;
1284179237Sjb
1285179237Sjb	/* Check if the symbol isn't cross-referenced. */
1286179237Sjb	if ((offset = ctfoff[symindx]) == 0xffffffff)
1287179237Sjb		return;
1288179237Sjb
1289179237Sjb	dp = (const ushort_t *)(lc.ctftab + offset + sizeof(ctf_header_t));
1290179237Sjb
1291179237Sjb	info = *dp++;
1292179237Sjb	kind = CTF_INFO_KIND(info);
1293179237Sjb	n = CTF_INFO_VLEN(info);
1294179237Sjb
1295179237Sjb	if (kind == CTF_K_UNKNOWN && n == 0) {
1296179237Sjb		printf("%s(%d): Unknown function!\n",__func__,__LINE__);
1297179237Sjb		return;
1298179237Sjb	}
1299179237Sjb
1300179237Sjb	if (kind != CTF_K_FUNCTION) {
1301179237Sjb		printf("%s(%d): Expected a function!\n",__func__,__LINE__);
1302179237Sjb		return;
1303179237Sjb	}
1304179237Sjb
1305248642Savg	if (fbt->fbtp_roffset != 0) {
1306248642Savg		/* Only return type is available for args[1] in return probe. */
1307248642Savg		if (ndx > 1)
1308248642Savg			return;
1309248642Savg		ASSERT(ndx == 1);
1310248642Savg	} else {
1311248642Savg		/* Check if the requested argument doesn't exist. */
1312248642Savg		if (ndx >= n)
1313248642Savg			return;
1314179237Sjb
1315248642Savg		/* Skip the return type and arguments up to the one requested. */
1316248642Savg		dp += ndx + 1;
1317248642Savg	}
1318179237Sjb
1319179237Sjb	if (fbt_type_name(&lc, *dp, desc->dtargd_native, sizeof(desc->dtargd_native)) > 0)
1320179237Sjb		desc->dtargd_ndx = ndx;
1321179237Sjb
1322179237Sjb	return;
1323179237Sjb}
1324179237Sjb
1325254268Smarkjstatic int
1326254268Smarkjfbt_linker_file_cb(linker_file_t lf, void *arg)
1327254268Smarkj{
1328254268Smarkj
1329254268Smarkj	fbt_provide_module(arg, lf);
1330254268Smarkj
1331254268Smarkj	return (0);
1332254268Smarkj}
1333254268Smarkj
1334179237Sjbstatic void
1335179237Sjbfbt_load(void *dummy)
1336179237Sjb{
1337179237Sjb	/* Create the /dev/dtrace/fbt entry. */
1338179237Sjb	fbt_cdev = make_dev(&fbt_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
1339179237Sjb	    "dtrace/fbt");
1340179237Sjb
1341179237Sjb	/* Default the probe table size if not specified. */
1342179237Sjb	if (fbt_probetab_size == 0)
1343179237Sjb		fbt_probetab_size = FBT_PROBETAB_SIZE;
1344179237Sjb
1345179237Sjb	/* Choose the hash mask for the probe table. */
1346179237Sjb	fbt_probetab_mask = fbt_probetab_size - 1;
1347179237Sjb
1348179237Sjb	/* Allocate memory for the probe table. */
1349179237Sjb	fbt_probetab =
1350179237Sjb	    malloc(fbt_probetab_size * sizeof (fbt_probe_t *), M_FBT, M_WAITOK | M_ZERO);
1351179237Sjb
1352179237Sjb	dtrace_doubletrap_func = fbt_doubletrap;
1353179237Sjb	dtrace_invop_add(fbt_invop);
1354179237Sjb
1355179237Sjb	if (dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_USER,
1356179237Sjb	    NULL, &fbt_pops, NULL, &fbt_id) != 0)
1357179237Sjb		return;
1358254268Smarkj
1359254268Smarkj	/* Create probes for the kernel and already-loaded modules. */
1360254268Smarkj	linker_file_foreach(fbt_linker_file_cb, NULL);
1361179237Sjb}
1362179237Sjb
1363179237Sjbstatic int
1364179237Sjbfbt_unload()
1365179237Sjb{
1366179237Sjb	int error = 0;
1367179237Sjb
1368179237Sjb	/* De-register the invalid opcode handler. */
1369179237Sjb	dtrace_invop_remove(fbt_invop);
1370179237Sjb
1371179237Sjb	dtrace_doubletrap_func = NULL;
1372179237Sjb
1373179237Sjb	/* De-register this DTrace provider. */
1374179237Sjb	if ((error = dtrace_unregister(fbt_id)) != 0)
1375179237Sjb		return (error);
1376179237Sjb
1377179237Sjb	/* Free the probe table. */
1378179237Sjb	free(fbt_probetab, M_FBT);
1379179237Sjb	fbt_probetab = NULL;
1380179237Sjb	fbt_probetab_mask = 0;
1381179237Sjb
1382179237Sjb	destroy_dev(fbt_cdev);
1383179237Sjb
1384179237Sjb	return (error);
1385179237Sjb}
1386179237Sjb
1387179237Sjbstatic int
1388179237Sjbfbt_modevent(module_t mod __unused, int type, void *data __unused)
1389179237Sjb{
1390179237Sjb	int error = 0;
1391179237Sjb
1392179237Sjb	switch (type) {
1393179237Sjb	case MOD_LOAD:
1394179237Sjb		break;
1395179237Sjb
1396179237Sjb	case MOD_UNLOAD:
1397179237Sjb		break;
1398179237Sjb
1399179237Sjb	case MOD_SHUTDOWN:
1400179237Sjb		break;
1401179237Sjb
1402179237Sjb	default:
1403179237Sjb		error = EOPNOTSUPP;
1404179237Sjb		break;
1405179237Sjb
1406179237Sjb	}
1407179237Sjb
1408179237Sjb	return (error);
1409179237Sjb}
1410179237Sjb
1411179237Sjbstatic int
1412179237Sjbfbt_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused)
1413179237Sjb{
1414179237Sjb	return (0);
1415179237Sjb}
1416179237Sjb
1417179237SjbSYSINIT(fbt_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_load, NULL);
1418179237SjbSYSUNINIT(fbt_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_unload, NULL);
1419179237Sjb
1420179237SjbDEV_MODULE(fbt, fbt_modevent, NULL);
1421179237SjbMODULE_VERSION(fbt, 1);
1422179237SjbMODULE_DEPEND(fbt, dtrace, 1, 1, 1);
1423179237SjbMODULE_DEPEND(fbt, opensolaris, 1, 1, 1);
1424