1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 *
21 * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
22 *
23 */
24
25/*
26 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27 * Use is subject to license terms.
28 */
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/conf.h>
33#include <sys/cpuvar.h>
34#include <sys/endian.h>
35#include <sys/fcntl.h>
36#include <sys/filio.h>
37#include <sys/kdb.h>
38#include <sys/kernel.h>
39#include <sys/kmem.h>
40#include <sys/kthread.h>
41#include <sys/limits.h>
42#include <sys/linker.h>
43#include <sys/lock.h>
44#include <sys/malloc.h>
45#include <sys/module.h>
46#include <sys/mutex.h>
47#include <sys/pcpu.h>
48#include <sys/poll.h>
49#include <sys/proc.h>
50#include <sys/selinfo.h>
51#include <sys/smp.h>
52#include <sys/syscall.h>
53#include <sys/sysent.h>
54#include <sys/sysproto.h>
55#include <sys/uio.h>
56#include <sys/unistd.h>
57#include <machine/stdarg.h>
58
59#include <sys/dtrace.h>
60#include <sys/dtrace_bsd.h>
61
62#include "fbt.h"
63
64MALLOC_DEFINE(M_FBT, "fbt", "Function Boundary Tracing");
65
66dtrace_provider_id_t	fbt_id;
67fbt_probe_t		**fbt_probetab;
68int			fbt_probetab_mask;
69
70static int	fbt_unload(void);
71static void	fbt_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
72static void	fbt_provide_module(void *, modctl_t *);
73static void	fbt_destroy(void *, dtrace_id_t, void *);
74static void	fbt_enable(void *, dtrace_id_t, void *);
75static void	fbt_disable(void *, dtrace_id_t, void *);
76static void	fbt_load(void *);
77static void	fbt_suspend(void *, dtrace_id_t, void *);
78static void	fbt_resume(void *, dtrace_id_t, void *);
79
80static dtrace_pattr_t fbt_attr = {
81{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
82{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
83{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
84{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
85{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
86};
87
88static dtrace_pops_t fbt_pops = {
89	.dtps_provide =		NULL,
90	.dtps_provide_module =	fbt_provide_module,
91	.dtps_enable =		fbt_enable,
92	.dtps_disable =		fbt_disable,
93	.dtps_suspend =		fbt_suspend,
94	.dtps_resume =		fbt_resume,
95	.dtps_getargdesc =	fbt_getargdesc,
96	.dtps_getargval =	NULL,
97	.dtps_usermode =	NULL,
98	.dtps_destroy =		fbt_destroy
99};
100
101static int			fbt_probetab_size;
102static int			fbt_verbose = 0;
103
104int
105fbt_excluded(const char *name)
106{
107
108	if (strncmp(name, "dtrace_", 7) == 0 &&
109	    strncmp(name, "dtrace_safe_", 12) != 0) {
110		/*
111		 * Anything beginning with "dtrace_" may be called
112		 * from probe context unless it explicitly indicates
113		 * that it won't be called from probe context by
114		 * using the prefix "dtrace_safe_".
115		 */
116		return (1);
117	}
118
119	/*
120	 * Omit instrumentation of functions that are probably in DDB.  It
121	 * makes it too hard to debug broken FBT.
122	 *
123	 * NB: kdb_enter() can be excluded, but its call to printf() can't be.
124	 * This is generally OK since we're not yet in debugging context.
125	 */
126	if (strncmp(name, "db_", 3) == 0 ||
127	    strncmp(name, "kdb_", 4) == 0)
128		return (1);
129
130	/*
131	 * Lock owner methods may be called from probe context.
132	 */
133	if (strcmp(name, "owner_mtx") == 0 ||
134	    strcmp(name, "owner_rm") == 0 ||
135	    strcmp(name, "owner_rw") == 0 ||
136	    strcmp(name, "owner_sx") == 0)
137		return (1);
138
139	/*
140	 * Stack unwinders may be called from probe context on some
141	 * platforms.
142	 */
143#if defined(__aarch64__) || defined(__riscv)
144	if (strcmp(name, "unwind_frame") == 0)
145		return (1);
146#endif
147
148	/*
149	 * When DTrace is built into the kernel we need to exclude
150	 * the FBT functions from instrumentation.
151	 */
152#ifndef _KLD_MODULE
153	if (strncmp(name, "fbt_", 4) == 0)
154		return (1);
155#endif
156
157	return (0);
158}
159
160static void
161fbt_doubletrap(void)
162{
163	fbt_probe_t *fbt;
164	int i;
165
166	for (i = 0; i < fbt_probetab_size; i++) {
167		fbt = fbt_probetab[i];
168
169		for (; fbt != NULL; fbt = fbt->fbtp_probenext)
170			fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
171	}
172}
173
174static void
175fbt_provide_module(void *arg, modctl_t *lf)
176{
177	char modname[MAXPATHLEN];
178	int i;
179	size_t len;
180
181	strlcpy(modname, lf->filename, sizeof(modname));
182	len = strlen(modname);
183	if (len > 3 && strcmp(modname + len - 3, ".ko") == 0)
184		modname[len - 3] = '\0';
185
186	/*
187	 * Employees of dtrace and their families are ineligible.  Void
188	 * where prohibited.
189	 */
190	if (strcmp(modname, "dtrace") == 0)
191		return;
192
193	/*
194	 * To register with DTrace, a module must list 'dtrace' as a
195	 * dependency in order for the kernel linker to resolve
196	 * symbols like dtrace_register(). All modules with such a
197	 * dependency are ineligible for FBT tracing.
198	 */
199	for (i = 0; i < lf->ndeps; i++)
200		if (strncmp(lf->deps[i]->filename, "dtrace", 6) == 0)
201			return;
202
203	if (lf->fbt_nentries) {
204		/*
205		 * This module has some FBT entries allocated; we're afraid
206		 * to screw with it.
207		 */
208		return;
209	}
210
211	/*
212	 * List the functions in the module and the symbol values.
213	 */
214	(void) linker_file_function_listall(lf, fbt_provide_module_function, modname);
215}
216
217static void
218fbt_destroy_one(fbt_probe_t *fbt)
219{
220	fbt_probe_t *hash, *hashprev, *next;
221	int ndx;
222
223	ndx = FBT_ADDR2NDX(fbt->fbtp_patchpoint);
224	for (hash = fbt_probetab[ndx], hashprev = NULL; hash != NULL;
225	    hashprev = hash, hash = hash->fbtp_hashnext) {
226		if (hash == fbt) {
227			if ((next = fbt->fbtp_tracenext) != NULL)
228				next->fbtp_hashnext = hash->fbtp_hashnext;
229			else
230				next = hash->fbtp_hashnext;
231			if (hashprev != NULL)
232				hashprev->fbtp_hashnext = next;
233			else
234				fbt_probetab[ndx] = next;
235			goto free;
236		} else if (hash->fbtp_patchpoint == fbt->fbtp_patchpoint) {
237			for (next = hash; next->fbtp_tracenext != NULL;
238			    next = next->fbtp_tracenext) {
239				if (fbt == next->fbtp_tracenext) {
240					next->fbtp_tracenext =
241					    fbt->fbtp_tracenext;
242					goto free;
243				}
244			}
245		}
246	}
247	panic("probe %p not found in hash table", fbt);
248free:
249	free(fbt, M_FBT);
250}
251
252static void
253fbt_destroy(void *arg, dtrace_id_t id, void *parg)
254{
255	fbt_probe_t *fbt = parg, *next;
256	modctl_t *ctl;
257
258	do {
259		ctl = fbt->fbtp_ctl;
260		ctl->fbt_nentries--;
261
262		next = fbt->fbtp_probenext;
263		fbt_destroy_one(fbt);
264		fbt = next;
265	} while (fbt != NULL);
266}
267
268static void
269fbt_enable(void *arg, dtrace_id_t id, void *parg)
270{
271	fbt_probe_t *fbt = parg;
272	modctl_t *ctl = fbt->fbtp_ctl;
273
274	ctl->nenabled++;
275
276	/*
277	 * Now check that our modctl has the expected load count.  If it
278	 * doesn't, this module must have been unloaded and reloaded -- and
279	 * we're not going to touch it.
280	 */
281	if (ctl->loadcnt != fbt->fbtp_loadcnt) {
282		if (fbt_verbose) {
283			printf("fbt is failing for probe %s "
284			    "(module %s reloaded)",
285			    fbt->fbtp_name, ctl->filename);
286		}
287
288		return;
289	}
290
291	for (; fbt != NULL; fbt = fbt->fbtp_probenext) {
292		fbt_patch_tracepoint(fbt, fbt->fbtp_patchval);
293		fbt->fbtp_enabled++;
294	}
295}
296
297static void
298fbt_disable(void *arg, dtrace_id_t id, void *parg)
299{
300	fbt_probe_t *fbt = parg, *hash;
301	modctl_t *ctl = fbt->fbtp_ctl;
302
303	ASSERT(ctl->nenabled > 0);
304	ctl->nenabled--;
305
306	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
307		return;
308
309	for (; fbt != NULL; fbt = fbt->fbtp_probenext) {
310		fbt->fbtp_enabled--;
311
312		for (hash = fbt_probetab[FBT_ADDR2NDX(fbt->fbtp_patchpoint)];
313		    hash != NULL; hash = hash->fbtp_hashnext) {
314			if (hash->fbtp_patchpoint == fbt->fbtp_patchpoint) {
315				for (; hash != NULL; hash = hash->fbtp_tracenext)
316					if (hash->fbtp_enabled > 0)
317						break;
318				break;
319			}
320		}
321		if (hash == NULL)
322			fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
323	}
324}
325
326static void
327fbt_suspend(void *arg, dtrace_id_t id, void *parg)
328{
329	fbt_probe_t *fbt = parg;
330	modctl_t *ctl = fbt->fbtp_ctl;
331
332	ASSERT(ctl->nenabled > 0);
333
334	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
335		return;
336
337	for (; fbt != NULL; fbt = fbt->fbtp_probenext)
338		fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
339}
340
341static void
342fbt_resume(void *arg, dtrace_id_t id, void *parg)
343{
344	fbt_probe_t *fbt = parg;
345	modctl_t *ctl = fbt->fbtp_ctl;
346
347	ASSERT(ctl->nenabled > 0);
348
349	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
350		return;
351
352	for (; fbt != NULL; fbt = fbt->fbtp_probenext)
353		fbt_patch_tracepoint(fbt, fbt->fbtp_patchval);
354}
355
356static int
357fbt_ctfoff_init(modctl_t *lf, linker_ctf_t *lc)
358{
359	const Elf_Sym *symp = lc->symtab;
360	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
361	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
362	size_t idwidth;
363	int i;
364	uint32_t *ctfoff;
365	uint32_t objtoff = hp->cth_objtoff;
366	uint32_t funcoff = hp->cth_funcoff;
367	uint_t kind, info, vlen;
368
369	/* Sanity check. */
370	if (hp->cth_magic != CTF_MAGIC) {
371		printf("Bad magic value in CTF data of '%s'\n",lf->pathname);
372		return (EINVAL);
373	}
374
375	if (lc->symtab == NULL) {
376		printf("No symbol table in '%s'\n",lf->pathname);
377		return (EINVAL);
378	}
379
380	ctfoff = malloc(sizeof(uint32_t) * lc->nsym, M_LINKER, M_WAITOK);
381	*lc->ctfoffp = ctfoff;
382
383	idwidth = hp->cth_version == CTF_VERSION_2 ? 2 : 4;
384
385	for (i = 0; i < lc->nsym; i++, ctfoff++, symp++) {
386		if (symp->st_name == 0 || symp->st_shndx == SHN_UNDEF) {
387			*ctfoff = 0xffffffff;
388			continue;
389		}
390
391		switch (ELF_ST_TYPE(symp->st_info)) {
392		case STT_OBJECT:
393			if (objtoff >= hp->cth_funcoff ||
394			    (symp->st_shndx == SHN_ABS && symp->st_value == 0)) {
395				*ctfoff = 0xffffffff;
396				break;
397			}
398
399			*ctfoff = objtoff;
400			objtoff += idwidth;
401			break;
402
403		case STT_FUNC:
404			if (funcoff >= hp->cth_typeoff) {
405				*ctfoff = 0xffffffff;
406				break;
407			}
408
409			*ctfoff = funcoff;
410
411			info = 0;
412			memcpy(&info, ctfdata + funcoff, idwidth);
413			if (hp->cth_version == CTF_VERSION_2) {
414				kind = CTF_V2_INFO_KIND(info);
415				vlen = CTF_V2_INFO_VLEN(info);
416			} else {
417				kind = CTF_V3_INFO_KIND(info);
418				vlen = CTF_V3_INFO_VLEN(info);
419			}
420
421			/*
422			 * If we encounter a zero pad at the end, just skip it.
423			 * Otherwise skip over the function and its return type
424			 * (+2) and the argument list (vlen).
425			 */
426			if (kind == CTF_K_UNKNOWN && vlen == 0)
427				funcoff += idwidth;
428			else
429				funcoff += idwidth * (vlen + 2);
430			break;
431
432		default:
433			*ctfoff = 0xffffffff;
434			break;
435		}
436	}
437
438	return (0);
439}
440
441static void
442fbt_get_ctt_index(uint8_t version, const void *v, uint_t *indexp,
443    uint_t *typep, int *ischildp)
444{
445	uint_t index, type;
446	int ischild;
447
448	if (version == CTF_VERSION_2) {
449		const struct ctf_type_v2 *ctt = v;
450
451		type = ctt->ctt_type;
452		index = CTF_V2_TYPE_TO_INDEX(ctt->ctt_type);
453		ischild = CTF_V2_TYPE_ISCHILD(ctt->ctt_type);
454	} else {
455		const struct ctf_type_v3 *ctt = v;
456
457		type = ctt->ctt_type;
458		index = CTF_V3_TYPE_TO_INDEX(ctt->ctt_type);
459		ischild = CTF_V3_TYPE_ISCHILD(ctt->ctt_type);
460	}
461
462	if (indexp != NULL)
463		*indexp = index;
464	if (typep != NULL)
465		*typep = type;
466	if (ischildp != NULL)
467		*ischildp = ischild;
468}
469
470static ssize_t
471fbt_get_ctt_size(uint8_t version, const void *tp, ssize_t *sizep,
472    ssize_t *incrementp)
473{
474	ssize_t size, increment;
475
476	if (version == CTF_VERSION_2) {
477		const struct ctf_type_v2 *ctt = tp;
478
479		if (ctt->ctt_size == CTF_V2_LSIZE_SENT) {
480			size = CTF_TYPE_LSIZE(ctt);
481			increment = sizeof (struct ctf_type_v2);
482		} else {
483			size = ctt->ctt_size;
484			increment = sizeof (struct ctf_stype_v2);
485		}
486	} else {
487		const struct ctf_type_v3 *ctt = tp;
488
489		if (ctt->ctt_size == CTF_V3_LSIZE_SENT) {
490			size = CTF_TYPE_LSIZE(ctt);
491			increment = sizeof (struct ctf_type_v3);
492		} else {
493			size = ctt->ctt_size;
494			increment = sizeof (struct ctf_stype_v3);
495		}
496	}
497
498	if (sizep)
499		*sizep = size;
500	if (incrementp)
501		*incrementp = increment;
502
503	return (size);
504}
505
506static void
507fbt_get_ctt_info(uint8_t version, const void *tp, uint_t *kindp, uint_t *vlenp,
508    int *isrootp)
509{
510	uint_t kind, vlen;
511	int isroot;
512
513	if (version == CTF_VERSION_2) {
514		const struct ctf_type_v2 *ctt = tp;
515
516		kind = CTF_V2_INFO_KIND(ctt->ctt_info);
517		vlen = CTF_V2_INFO_VLEN(ctt->ctt_info);
518		isroot = CTF_V2_INFO_ISROOT(ctt->ctt_info);
519	} else {
520		const struct ctf_type_v3 *ctt = tp;
521
522		kind = CTF_V3_INFO_KIND(ctt->ctt_info);
523		vlen = CTF_V3_INFO_VLEN(ctt->ctt_info);
524		isroot = CTF_V3_INFO_ISROOT(ctt->ctt_info);
525	}
526
527	if (kindp != NULL)
528		*kindp = kind;
529	if (vlenp != NULL)
530		*vlenp = vlen;
531	if (isrootp != NULL)
532		*isrootp = isroot;
533}
534
535static int
536fbt_typoff_init(linker_ctf_t *lc)
537{
538	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
539	const void *tbuf, *tend, *tp;
540	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
541	size_t idwidth;
542	int ctf_typemax = 0;
543	uint32_t *xp;
544	ulong_t pop[CTF_K_MAX + 1] = { 0 };
545	uint8_t version;
546
547	/* Sanity check. */
548	if (hp->cth_magic != CTF_MAGIC)
549		return (EINVAL);
550
551	version = hp->cth_version;
552	idwidth = version == CTF_VERSION_2 ? 2 : 4;
553
554	tbuf = (const void *) (ctfdata + hp->cth_typeoff);
555	tend = (const void *) (ctfdata + hp->cth_stroff);
556
557	/*
558	 * We make two passes through the entire type section.  In this first
559	 * pass, we count the number of each type and the total number of types.
560	 */
561	for (tp = tbuf; tp < tend; ctf_typemax++) {
562		uint_t kind, type, vlen;
563		ssize_t size, increment;
564		size_t vbytes;
565
566		(void) fbt_get_ctt_size(version, tp, &size, &increment);
567		fbt_get_ctt_info(version, tp, &kind, &vlen, NULL);
568		fbt_get_ctt_index(version, tp, NULL, &type, NULL);
569
570		switch (kind) {
571		case CTF_K_INTEGER:
572		case CTF_K_FLOAT:
573			vbytes = sizeof (uint_t);
574			break;
575		case CTF_K_ARRAY:
576			if (version == CTF_VERSION_2)
577				vbytes = sizeof (struct ctf_array_v2);
578			else
579				vbytes = sizeof (struct ctf_array_v3);
580			break;
581		case CTF_K_FUNCTION:
582			vbytes = roundup2(idwidth * vlen, sizeof(uint32_t));
583			break;
584		case CTF_K_STRUCT:
585		case CTF_K_UNION:
586			if (version == CTF_VERSION_2) {
587				if (size < CTF_V2_LSTRUCT_THRESH)
588					vbytes =
589					    sizeof (struct ctf_member_v2) * vlen;
590				else
591					vbytes =
592					    sizeof (struct ctf_lmember_v2) * vlen;
593			} else {
594				if (size < CTF_V3_LSTRUCT_THRESH)
595					vbytes =
596					    sizeof (struct ctf_member_v3) * vlen;
597				else
598					vbytes =
599					    sizeof (struct ctf_lmember_v3) * vlen;
600			}
601			break;
602		case CTF_K_ENUM:
603			vbytes = sizeof (ctf_enum_t) * vlen;
604			break;
605		case CTF_K_FORWARD:
606			/*
607			 * For forward declarations, ctt_type is the CTF_K_*
608			 * kind for the tag, so bump that population count too.
609			 * If ctt_type is unknown, treat the tag as a struct.
610			 */
611			if (type == CTF_K_UNKNOWN || type >= CTF_K_MAX)
612				pop[CTF_K_STRUCT]++;
613			else
614				pop[type]++;
615			/*FALLTHRU*/
616		case CTF_K_UNKNOWN:
617			vbytes = 0;
618			break;
619		case CTF_K_POINTER:
620		case CTF_K_TYPEDEF:
621		case CTF_K_VOLATILE:
622		case CTF_K_CONST:
623		case CTF_K_RESTRICT:
624			vbytes = 0;
625			break;
626		default:
627			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
628			return (EIO);
629		}
630		tp = (const void *)((uintptr_t)tp + increment + vbytes);
631		pop[kind]++;
632	}
633
634	/* account for a sentinel value below */
635	ctf_typemax++;
636	*lc->typlenp = ctf_typemax;
637
638	xp = malloc(sizeof(uint32_t) * ctf_typemax, M_LINKER,
639	    M_ZERO | M_WAITOK);
640
641	*lc->typoffp = xp;
642
643	/* type id 0 is used as a sentinel value */
644	*xp++ = 0;
645
646	/*
647	 * In the second pass, fill in the type offset.
648	 */
649	for (tp = tbuf; tp < tend; xp++) {
650		ssize_t size, increment;
651		uint_t kind, vlen;
652
653		size_t vbytes;
654
655		(void) fbt_get_ctt_size(version, tp, &size, &increment);
656		fbt_get_ctt_info(version, tp, &kind, &vlen, NULL);
657
658		switch (kind) {
659		case CTF_K_INTEGER:
660		case CTF_K_FLOAT:
661			vbytes = sizeof (uint_t);
662			break;
663		case CTF_K_ARRAY:
664			if (version == CTF_VERSION_2)
665				vbytes = sizeof (struct ctf_array_v2);
666			else
667				vbytes = sizeof (struct ctf_array_v3);
668			break;
669		case CTF_K_FUNCTION:
670			vbytes = roundup2(idwidth * vlen, sizeof(uint32_t));
671			break;
672		case CTF_K_STRUCT:
673		case CTF_K_UNION:
674			if (version == CTF_VERSION_2) {
675				if (size < CTF_V2_LSTRUCT_THRESH)
676					vbytes =
677					    sizeof (struct ctf_member_v2) * vlen;
678				else
679					vbytes =
680					    sizeof (struct ctf_lmember_v2) * vlen;
681			} else {
682				if (size < CTF_V3_LSTRUCT_THRESH)
683					vbytes =
684					    sizeof (struct ctf_member_v3) * vlen;
685				else
686					vbytes =
687					    sizeof (struct ctf_lmember_v3) * vlen;
688			}
689			break;
690		case CTF_K_ENUM:
691			vbytes = sizeof (ctf_enum_t) * vlen;
692			break;
693		case CTF_K_FORWARD:
694		case CTF_K_UNKNOWN:
695			vbytes = 0;
696			break;
697		case CTF_K_POINTER:
698		case CTF_K_TYPEDEF:
699		case CTF_K_VOLATILE:
700		case CTF_K_CONST:
701		case CTF_K_RESTRICT:
702			vbytes = 0;
703			break;
704		default:
705			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
706			return (EIO);
707		}
708		*xp = (uint32_t)((uintptr_t) tp - (uintptr_t) ctfdata);
709		tp = (const void *)((uintptr_t)tp + increment + vbytes);
710	}
711
712	return (0);
713}
714
715/*
716 * CTF Declaration Stack
717 *
718 * In order to implement ctf_type_name(), we must convert a type graph back
719 * into a C type declaration.  Unfortunately, a type graph represents a storage
720 * class ordering of the type whereas a type declaration must obey the C rules
721 * for operator precedence, and the two orderings are frequently in conflict.
722 * For example, consider these CTF type graphs and their C declarations:
723 *
724 * CTF_K_POINTER -> CTF_K_FUNCTION -> CTF_K_INTEGER  : int (*)()
725 * CTF_K_POINTER -> CTF_K_ARRAY -> CTF_K_INTEGER     : int (*)[]
726 *
727 * In each case, parentheses are used to raise operator * to higher lexical
728 * precedence, so the string form of the C declaration cannot be constructed by
729 * walking the type graph links and forming the string from left to right.
730 *
731 * The functions in this file build a set of stacks from the type graph nodes
732 * corresponding to the C operator precedence levels in the appropriate order.
733 * The code in ctf_type_name() can then iterate over the levels and nodes in
734 * lexical precedence order and construct the final C declaration string.
735 */
736typedef struct ctf_list {
737	struct ctf_list *l_prev; /* previous pointer or tail pointer */
738	struct ctf_list *l_next; /* next pointer or head pointer */
739} ctf_list_t;
740
741#define	ctf_list_prev(elem)	((void *)(((ctf_list_t *)(elem))->l_prev))
742#define	ctf_list_next(elem)	((void *)(((ctf_list_t *)(elem))->l_next))
743
744typedef enum {
745	CTF_PREC_BASE,
746	CTF_PREC_POINTER,
747	CTF_PREC_ARRAY,
748	CTF_PREC_FUNCTION,
749	CTF_PREC_MAX
750} ctf_decl_prec_t;
751
752typedef struct ctf_decl_node {
753	ctf_list_t cd_list;			/* linked list pointers */
754	ctf_id_t cd_type;			/* type identifier */
755	uint_t cd_kind;				/* type kind */
756	uint_t cd_n;				/* type dimension if array */
757} ctf_decl_node_t;
758
759typedef struct ctf_decl {
760	ctf_list_t cd_nodes[CTF_PREC_MAX];	/* declaration node stacks */
761	int cd_order[CTF_PREC_MAX];		/* storage order of decls */
762	ctf_decl_prec_t cd_qualp;		/* qualifier precision */
763	ctf_decl_prec_t cd_ordp;		/* ordered precision */
764	char *cd_buf;				/* buffer for output */
765	char *cd_ptr;				/* buffer location */
766	char *cd_end;				/* buffer limit */
767	size_t cd_len;				/* buffer space required */
768	int cd_err;				/* saved error value */
769} ctf_decl_t;
770
771/*
772 * Simple doubly-linked list append routine.  This implementation assumes that
773 * each list element contains an embedded ctf_list_t as the first member.
774 * An additional ctf_list_t is used to store the head (l_next) and tail
775 * (l_prev) pointers.  The current head and tail list elements have their
776 * previous and next pointers set to NULL, respectively.
777 */
778static void
779ctf_list_append(ctf_list_t *lp, void *new)
780{
781	ctf_list_t *p = lp->l_prev;	/* p = tail list element */
782	ctf_list_t *q = new;		/* q = new list element */
783
784	lp->l_prev = q;
785	q->l_prev = p;
786	q->l_next = NULL;
787
788	if (p != NULL)
789		p->l_next = q;
790	else
791		lp->l_next = q;
792}
793
794/*
795 * Prepend the specified existing element to the given ctf_list_t.  The
796 * existing pointer should be pointing at a struct with embedded ctf_list_t.
797 */
798static void
799ctf_list_prepend(ctf_list_t *lp, void *new)
800{
801	ctf_list_t *p = new;		/* p = new list element */
802	ctf_list_t *q = lp->l_next;	/* q = head list element */
803
804	lp->l_next = p;
805	p->l_prev = NULL;
806	p->l_next = q;
807
808	if (q != NULL)
809		q->l_prev = p;
810	else
811		lp->l_prev = p;
812}
813
814static void
815ctf_decl_init(ctf_decl_t *cd, char *buf, size_t len)
816{
817	int i;
818
819	bzero(cd, sizeof (ctf_decl_t));
820
821	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++)
822		cd->cd_order[i] = CTF_PREC_BASE - 1;
823
824	cd->cd_qualp = CTF_PREC_BASE;
825	cd->cd_ordp = CTF_PREC_BASE;
826
827	cd->cd_buf = buf;
828	cd->cd_ptr = buf;
829	cd->cd_end = buf + len;
830}
831
832static void
833ctf_decl_fini(ctf_decl_t *cd)
834{
835	ctf_decl_node_t *cdp, *ndp;
836	int i;
837
838	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++) {
839		for (cdp = ctf_list_next(&cd->cd_nodes[i]);
840		    cdp != NULL; cdp = ndp) {
841			ndp = ctf_list_next(cdp);
842			free(cdp, M_FBT);
843		}
844	}
845}
846
847static const void *
848ctf_lookup_by_id(linker_ctf_t *lc, ctf_id_t type)
849{
850	const void *tp;
851	uint32_t offset;
852	uint32_t *typoff = *lc->typoffp;
853
854	if (type >= *lc->typlenp) {
855		printf("%s(%d): type %d exceeds max %ld\n",__func__,__LINE__,(int) type,*lc->typlenp);
856		return(NULL);
857	}
858
859	/* Check if the type isn't cross-referenced. */
860	if ((offset = typoff[type]) == 0) {
861		printf("%s(%d): type %d isn't cross referenced\n",__func__,__LINE__, (int) type);
862		return(NULL);
863	}
864
865	tp = (const void *) (lc->ctftab + offset + sizeof(ctf_header_t));
866
867	return (tp);
868}
869
870static void
871fbt_array_info(linker_ctf_t *lc, ctf_id_t type, ctf_arinfo_t *arp)
872{
873	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
874	const void *tp;
875	ssize_t increment;
876	uint_t kind;
877
878	bzero(arp, sizeof(*arp));
879
880	if ((tp = ctf_lookup_by_id(lc, type)) == NULL)
881		return;
882
883	fbt_get_ctt_info(hp->cth_version, tp, &kind, NULL, NULL);
884	if (kind != CTF_K_ARRAY)
885		return;
886
887	(void) fbt_get_ctt_size(hp->cth_version, tp, NULL, &increment);
888
889	if (hp->cth_version == CTF_VERSION_2) {
890		const struct ctf_array_v2 *ap;
891
892		ap = (const struct ctf_array_v2 *)((uintptr_t)tp + increment);
893		arp->ctr_contents = ap->cta_contents;
894		arp->ctr_index = ap->cta_index;
895		arp->ctr_nelems = ap->cta_nelems;
896	} else {
897		const struct ctf_array_v3 *ap;
898
899		ap = (const struct ctf_array_v3 *)((uintptr_t)tp + increment);
900		arp->ctr_contents = ap->cta_contents;
901		arp->ctr_index = ap->cta_index;
902		arp->ctr_nelems = ap->cta_nelems;
903	}
904}
905
906static const char *
907ctf_strptr(linker_ctf_t *lc, int name)
908{
909	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
910	const char *strp = "";
911
912	if (name < 0 || name >= hp->cth_strlen)
913		return(strp);
914
915	strp = (const char *)(lc->ctftab + hp->cth_stroff + name + sizeof(ctf_header_t));
916
917	return (strp);
918}
919
920static const char *
921ctf_type_rname(linker_ctf_t *lc, const void *v)
922{
923	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
924	uint_t name;
925
926	if (hp->cth_version == CTF_VERSION_2) {
927		const struct ctf_type_v2 *ctt = v;
928
929		name = ctt->ctt_name;
930	} else {
931		const struct ctf_type_v3 *ctt = v;
932
933		name = ctt->ctt_name;
934	}
935
936	return (ctf_strptr(lc, name));
937}
938
939static void
940ctf_decl_push(ctf_decl_t *cd, linker_ctf_t *lc, ctf_id_t type)
941{
942	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
943	ctf_decl_node_t *cdp;
944	ctf_decl_prec_t prec;
945	uint_t kind, n = 1, t;
946	int is_qual = 0;
947
948	const void *tp;
949	ctf_arinfo_t ar;
950
951	if ((tp = ctf_lookup_by_id(lc, type)) == NULL) {
952		cd->cd_err = ENOENT;
953		return;
954	}
955
956	fbt_get_ctt_info(hp->cth_version, tp, &kind, NULL, NULL);
957	fbt_get_ctt_index(hp->cth_version, tp, NULL, &t, NULL);
958
959	switch (kind) {
960	case CTF_K_ARRAY:
961		fbt_array_info(lc, type, &ar);
962		ctf_decl_push(cd, lc, ar.ctr_contents);
963		n = ar.ctr_nelems;
964		prec = CTF_PREC_ARRAY;
965		break;
966
967	case CTF_K_TYPEDEF:
968		if (ctf_type_rname(lc, tp)[0] == '\0') {
969			ctf_decl_push(cd, lc, t);
970			return;
971		}
972		prec = CTF_PREC_BASE;
973		break;
974
975	case CTF_K_FUNCTION:
976		ctf_decl_push(cd, lc, t);
977		prec = CTF_PREC_FUNCTION;
978		break;
979
980	case CTF_K_POINTER:
981		ctf_decl_push(cd, lc, t);
982		prec = CTF_PREC_POINTER;
983		break;
984
985	case CTF_K_VOLATILE:
986	case CTF_K_CONST:
987	case CTF_K_RESTRICT:
988		ctf_decl_push(cd, lc, t);
989		prec = cd->cd_qualp;
990		is_qual++;
991		break;
992
993	default:
994		prec = CTF_PREC_BASE;
995	}
996
997	cdp = malloc(sizeof(*cdp), M_FBT, M_WAITOK);
998	cdp->cd_type = type;
999	cdp->cd_kind = kind;
1000	cdp->cd_n = n;
1001
1002	if (ctf_list_next(&cd->cd_nodes[prec]) == NULL)
1003		cd->cd_order[prec] = cd->cd_ordp++;
1004
1005	/*
1006	 * Reset cd_qualp to the highest precedence level that we've seen so
1007	 * far that can be qualified (CTF_PREC_BASE or CTF_PREC_POINTER).
1008	 */
1009	if (prec > cd->cd_qualp && prec < CTF_PREC_ARRAY)
1010		cd->cd_qualp = prec;
1011
1012	/*
1013	 * C array declarators are ordered inside out so prepend them.  Also by
1014	 * convention qualifiers of base types precede the type specifier (e.g.
1015	 * const int vs. int const) even though the two forms are equivalent.
1016	 */
1017	if (kind == CTF_K_ARRAY || (is_qual && prec == CTF_PREC_BASE))
1018		ctf_list_prepend(&cd->cd_nodes[prec], cdp);
1019	else
1020		ctf_list_append(&cd->cd_nodes[prec], cdp);
1021}
1022
1023static void
1024ctf_decl_sprintf(ctf_decl_t *cd, const char *format, ...)
1025{
1026	size_t len = (size_t)(cd->cd_end - cd->cd_ptr);
1027	va_list ap;
1028	size_t n;
1029
1030	va_start(ap, format);
1031	n = vsnprintf(cd->cd_ptr, len, format, ap);
1032	va_end(ap);
1033
1034	cd->cd_ptr += MIN(n, len);
1035	cd->cd_len += n;
1036}
1037
1038static ssize_t
1039fbt_type_name(linker_ctf_t *lc, ctf_id_t type, char *buf, size_t len)
1040{
1041	ctf_decl_t cd;
1042	ctf_decl_node_t *cdp;
1043	ctf_decl_prec_t prec, lp, rp;
1044	int ptr, arr;
1045	uint_t k;
1046
1047	if (lc == NULL && type == CTF_ERR)
1048		return (-1); /* simplify caller code by permitting CTF_ERR */
1049
1050	ctf_decl_init(&cd, buf, len);
1051	ctf_decl_push(&cd, lc, type);
1052
1053	if (cd.cd_err != 0) {
1054		ctf_decl_fini(&cd);
1055		return (-1);
1056	}
1057
1058	/*
1059	 * If the type graph's order conflicts with lexical precedence order
1060	 * for pointers or arrays, then we need to surround the declarations at
1061	 * the corresponding lexical precedence with parentheses.  This can
1062	 * result in either a parenthesized pointer (*) as in int (*)() or
1063	 * int (*)[], or in a parenthesized pointer and array as in int (*[])().
1064	 */
1065	ptr = cd.cd_order[CTF_PREC_POINTER] > CTF_PREC_POINTER;
1066	arr = cd.cd_order[CTF_PREC_ARRAY] > CTF_PREC_ARRAY;
1067
1068	rp = arr ? CTF_PREC_ARRAY : ptr ? CTF_PREC_POINTER : -1;
1069	lp = ptr ? CTF_PREC_POINTER : arr ? CTF_PREC_ARRAY : -1;
1070
1071	k = CTF_K_POINTER; /* avoid leading whitespace (see below) */
1072
1073	for (prec = CTF_PREC_BASE; prec < CTF_PREC_MAX; prec++) {
1074		for (cdp = ctf_list_next(&cd.cd_nodes[prec]);
1075		    cdp != NULL; cdp = ctf_list_next(cdp)) {
1076
1077			const void *tp = ctf_lookup_by_id(lc, cdp->cd_type);
1078			const char *name = ctf_type_rname(lc, tp);
1079
1080			if (k != CTF_K_POINTER && k != CTF_K_ARRAY)
1081				ctf_decl_sprintf(&cd, " ");
1082
1083			if (lp == prec) {
1084				ctf_decl_sprintf(&cd, "(");
1085				lp = -1;
1086			}
1087
1088			switch (cdp->cd_kind) {
1089			case CTF_K_INTEGER:
1090			case CTF_K_FLOAT:
1091			case CTF_K_TYPEDEF:
1092				ctf_decl_sprintf(&cd, "%s", name);
1093				break;
1094			case CTF_K_POINTER:
1095				ctf_decl_sprintf(&cd, "*");
1096				break;
1097			case CTF_K_ARRAY:
1098				ctf_decl_sprintf(&cd, "[%u]", cdp->cd_n);
1099				break;
1100			case CTF_K_FUNCTION:
1101				ctf_decl_sprintf(&cd, "()");
1102				break;
1103			case CTF_K_STRUCT:
1104			case CTF_K_FORWARD:
1105				ctf_decl_sprintf(&cd, "struct %s", name);
1106				break;
1107			case CTF_K_UNION:
1108				ctf_decl_sprintf(&cd, "union %s", name);
1109				break;
1110			case CTF_K_ENUM:
1111				ctf_decl_sprintf(&cd, "enum %s", name);
1112				break;
1113			case CTF_K_VOLATILE:
1114				ctf_decl_sprintf(&cd, "volatile");
1115				break;
1116			case CTF_K_CONST:
1117				ctf_decl_sprintf(&cd, "const");
1118				break;
1119			case CTF_K_RESTRICT:
1120				ctf_decl_sprintf(&cd, "restrict");
1121				break;
1122			}
1123
1124			k = cdp->cd_kind;
1125		}
1126
1127		if (rp == prec)
1128			ctf_decl_sprintf(&cd, ")");
1129	}
1130
1131	ctf_decl_fini(&cd);
1132	return (cd.cd_len);
1133}
1134
1135static void
1136fbt_getargdesc(void *arg __unused, dtrace_id_t id __unused, void *parg, dtrace_argdesc_t *desc)
1137{
1138	const ctf_header_t *hp;
1139	const char *dp;
1140	fbt_probe_t *fbt = parg;
1141	linker_ctf_t lc;
1142	modctl_t *ctl = fbt->fbtp_ctl;
1143	size_t idwidth;
1144	int ndx = desc->dtargd_ndx;
1145	int symindx = fbt->fbtp_symindx;
1146	uint32_t *ctfoff;
1147	uint32_t offset, type;
1148	uint_t info, n;
1149	ushort_t kind;
1150
1151	if (fbt->fbtp_roffset != 0 && desc->dtargd_ndx == 0) {
1152		(void) strcpy(desc->dtargd_native, "int");
1153		return;
1154	}
1155
1156	desc->dtargd_ndx = DTRACE_ARGNONE;
1157
1158	/* Get a pointer to the CTF data and it's length. */
1159	if (linker_ctf_get(ctl, &lc) != 0)
1160		/* No CTF data? Something wrong? *shrug* */
1161		return;
1162
1163	/* Check if this module hasn't been initialised yet. */
1164	if (*lc.ctfoffp == NULL) {
1165		/*
1166		 * Initialise the CTF object and function symindx to
1167		 * byte offset array.
1168		 */
1169		if (fbt_ctfoff_init(ctl, &lc) != 0)
1170			return;
1171
1172		/* Initialise the CTF type to byte offset array. */
1173		if (fbt_typoff_init(&lc) != 0)
1174			return;
1175	}
1176
1177	ctfoff = *lc.ctfoffp;
1178
1179	if (ctfoff == NULL || *lc.typoffp == NULL)
1180		return;
1181
1182	/* Check if the symbol index is out of range. */
1183	if (symindx >= lc.nsym)
1184		return;
1185
1186	/* Check if the symbol isn't cross-referenced. */
1187	if ((offset = ctfoff[symindx]) == 0xffffffff)
1188		return;
1189
1190	hp = (const ctf_header_t *) lc.ctftab;
1191	idwidth = hp->cth_version == CTF_VERSION_2 ? 2 : 4;
1192	dp = (const char *)(lc.ctftab + offset + sizeof(ctf_header_t));
1193
1194	info = 0;
1195	memcpy(&info, dp, idwidth);
1196	dp += idwidth;
1197	if (hp->cth_version == CTF_VERSION_2) {
1198		kind = CTF_V2_INFO_KIND(info);
1199		n = CTF_V2_INFO_VLEN(info);
1200	} else {
1201		kind = CTF_V3_INFO_KIND(info);
1202		n = CTF_V3_INFO_VLEN(info);
1203	}
1204
1205	if (kind == CTF_K_UNKNOWN && n == 0) {
1206		printf("%s(%d): Unknown function!\n",__func__,__LINE__);
1207		return;
1208	}
1209
1210	if (kind != CTF_K_FUNCTION) {
1211		printf("%s(%d): Expected a function!\n",__func__,__LINE__);
1212		return;
1213	}
1214
1215	if (fbt->fbtp_roffset != 0) {
1216		/* Only return type is available for args[1] in return probe. */
1217		if (ndx > 1)
1218			return;
1219		ASSERT(ndx == 1);
1220	} else {
1221		/* Check if the requested argument doesn't exist. */
1222		if (ndx >= n)
1223			return;
1224
1225		/* Skip the return type and arguments up to the one requested. */
1226		dp += idwidth * (ndx + 1);
1227	}
1228
1229	type = 0;
1230	memcpy(&type, dp, idwidth);
1231	if (fbt_type_name(&lc, type, desc->dtargd_native, sizeof(desc->dtargd_native)) > 0)
1232		desc->dtargd_ndx = ndx;
1233}
1234
1235static int
1236fbt_linker_file_cb(linker_file_t lf, void *arg)
1237{
1238
1239	fbt_provide_module(arg, lf);
1240
1241	return (0);
1242}
1243
1244static void
1245fbt_load(void *dummy)
1246{
1247	/* Default the probe table size if not specified. */
1248	if (fbt_probetab_size == 0)
1249		fbt_probetab_size = FBT_PROBETAB_SIZE;
1250
1251	/* Choose the hash mask for the probe table. */
1252	fbt_probetab_mask = fbt_probetab_size - 1;
1253
1254	/* Allocate memory for the probe table. */
1255	fbt_probetab =
1256	    malloc(fbt_probetab_size * sizeof (fbt_probe_t *), M_FBT, M_WAITOK | M_ZERO);
1257
1258	dtrace_doubletrap_func = fbt_doubletrap;
1259	dtrace_invop_add(fbt_invop);
1260
1261	if (dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_USER,
1262	    NULL, &fbt_pops, NULL, &fbt_id) != 0)
1263		return;
1264
1265	/* Create probes for the kernel and already-loaded modules. */
1266	linker_file_foreach(fbt_linker_file_cb, NULL);
1267}
1268
1269static int
1270fbt_unload(void)
1271{
1272	int error = 0;
1273
1274	/* De-register the invalid opcode handler. */
1275	dtrace_invop_remove(fbt_invop);
1276
1277	dtrace_doubletrap_func = NULL;
1278
1279	/* De-register this DTrace provider. */
1280	if ((error = dtrace_unregister(fbt_id)) != 0)
1281		return (error);
1282
1283	/* Free the probe table. */
1284	free(fbt_probetab, M_FBT);
1285	fbt_probetab = NULL;
1286	fbt_probetab_mask = 0;
1287
1288	return (error);
1289}
1290
1291static int
1292fbt_modevent(module_t mod __unused, int type, void *data __unused)
1293{
1294	int error = 0;
1295
1296	switch (type) {
1297	case MOD_LOAD:
1298		break;
1299
1300	case MOD_UNLOAD:
1301		break;
1302
1303	case MOD_SHUTDOWN:
1304		break;
1305
1306	default:
1307		error = EOPNOTSUPP;
1308		break;
1309
1310	}
1311
1312	return (error);
1313}
1314
1315SYSINIT(fbt_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_load, NULL);
1316SYSUNINIT(fbt_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_unload, NULL);
1317
1318DEV_MODULE(fbt, fbt_modevent, NULL);
1319MODULE_VERSION(fbt, 1);
1320MODULE_DEPEND(fbt, dtrace, 1, 1, 1);
1321MODULE_DEPEND(fbt, opensolaris, 1, 1, 1);
1322