1179193Sjb/*
2179193Sjb * CDDL HEADER START
3179193Sjb *
4179193Sjb * The contents of this file are subject to the terms of the
5179193Sjb * Common Development and Distribution License (the "License").
6179193Sjb * You may not use this file except in compliance with the License.
7179193Sjb *
8179193Sjb * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9179193Sjb * or http://www.opensolaris.org/os/licensing.
10179193Sjb * See the License for the specific language governing permissions
11179193Sjb * and limitations under the License.
12179193Sjb *
13179193Sjb * When distributing Covered Code, include this CDDL HEADER in each
14179193Sjb * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15179193Sjb * If applicable, add the following below this CDDL HEADER, with the
16179193Sjb * fields enclosed by brackets "[]" replaced with your own identifying
17179193Sjb * information: Portions Copyright [yyyy] [name of copyright owner]
18179193Sjb *
19179193Sjb * CDDL HEADER END
20179193Sjb */
21179193Sjb/*
22179193Sjb * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23179193Sjb * Use is subject to license terms.
24179193Sjb */
25179193Sjb
26179193Sjb#pragma ident	"%Z%%M%	%I%	%E% SMI"
27179193Sjb
28179193Sjb#include <sys/dtrace.h>
29179193Sjb#include <sys/systrace.h>
30179193Sjb#include <sys/stat.h>
31179193Sjb#include <sys/systm.h>
32179193Sjb#include <sys/conf.h>
33179193Sjb#include <sys/ddi.h>
34179193Sjb#include <sys/sunddi.h>
35179193Sjb#include <sys/atomic.h>
36179193Sjb
37179193Sjb#define	SYSTRACE_ARTIFICIAL_FRAMES	1
38179193Sjb
39179193Sjb#define	SYSTRACE_SHIFT			16
40179193Sjb#define	SYSTRACE_ISENTRY(x)		((int)(x) >> SYSTRACE_SHIFT)
41179193Sjb#define	SYSTRACE_SYSNUM(x)		((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
42179193Sjb#define	SYSTRACE_ENTRY(id)		((1 << SYSTRACE_SHIFT) | (id))
43179193Sjb#define	SYSTRACE_RETURN(id)		(id)
44179193Sjb
45179193Sjb#if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
46179193Sjb#error 1 << SYSTRACE_SHIFT must exceed number of system calls
47179193Sjb#endif
48179193Sjb
49179193Sjbstatic dev_info_t *systrace_devi;
50179193Sjbstatic dtrace_provider_id_t systrace_id;
51179193Sjb
52179193Sjbstatic void
53179193Sjbsystrace_init(struct sysent *actual, systrace_sysent_t **interposed)
54179193Sjb{
55179193Sjb	systrace_sysent_t *sysent = *interposed;
56179193Sjb	int i;
57179193Sjb
58179193Sjb	if (sysent == NULL) {
59179193Sjb		*interposed = sysent = kmem_zalloc(sizeof (systrace_sysent_t) *
60179193Sjb		    NSYSCALL, KM_SLEEP);
61179193Sjb	}
62179193Sjb
63179193Sjb	for (i = 0; i < NSYSCALL; i++) {
64179193Sjb		struct sysent *a = &actual[i];
65179193Sjb		systrace_sysent_t *s = &sysent[i];
66179193Sjb
67179193Sjb		if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
68179193Sjb			continue;
69179193Sjb
70179193Sjb		if (a->sy_callc == dtrace_systrace_syscall)
71179193Sjb			continue;
72179193Sjb
73179193Sjb#ifdef _SYSCALL32_IMPL
74179193Sjb		if (a->sy_callc == dtrace_systrace_syscall32)
75179193Sjb			continue;
76179193Sjb#endif
77179193Sjb
78179193Sjb		s->stsy_underlying = a->sy_callc;
79179193Sjb	}
80179193Sjb}
81179193Sjb
82179193Sjb/*ARGSUSED*/
83179193Sjbstatic void
84179193Sjbsystrace_provide(void *arg, const dtrace_probedesc_t *desc)
85179193Sjb{
86179193Sjb	int i;
87179193Sjb
88179193Sjb	if (desc != NULL)
89179193Sjb		return;
90179193Sjb
91179193Sjb	systrace_init(sysent, &systrace_sysent);
92179193Sjb#ifdef _SYSCALL32_IMPL
93179193Sjb	systrace_init(sysent32, &systrace_sysent32);
94179193Sjb#endif
95179193Sjb
96179193Sjb	for (i = 0; i < NSYSCALL; i++) {
97179193Sjb		if (systrace_sysent[i].stsy_underlying == NULL)
98179193Sjb			continue;
99179193Sjb
100179193Sjb		if (dtrace_probe_lookup(systrace_id, NULL,
101179193Sjb		    syscallnames[i], "entry") != 0)
102179193Sjb			continue;
103179193Sjb
104179193Sjb		(void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
105179193Sjb		    "entry", SYSTRACE_ARTIFICIAL_FRAMES,
106179193Sjb		    (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
107179193Sjb		(void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
108179193Sjb		    "return", SYSTRACE_ARTIFICIAL_FRAMES,
109179193Sjb		    (void *)((uintptr_t)SYSTRACE_RETURN(i)));
110179193Sjb
111179193Sjb		systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
112179193Sjb		systrace_sysent[i].stsy_return = DTRACE_IDNONE;
113179193Sjb#ifdef _SYSCALL32_IMPL
114179193Sjb		systrace_sysent32[i].stsy_entry = DTRACE_IDNONE;
115179193Sjb		systrace_sysent32[i].stsy_return = DTRACE_IDNONE;
116179193Sjb#endif
117179193Sjb	}
118179193Sjb}
119179193Sjb
120179193Sjb/*ARGSUSED*/
121179193Sjbstatic void
122179193Sjbsystrace_destroy(void *arg, dtrace_id_t id, void *parg)
123179193Sjb{
124179193Sjb	int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
125179193Sjb
126179193Sjb	/*
127179193Sjb	 * There's nothing to do here but assert that we have actually been
128179193Sjb	 * disabled.
129179193Sjb	 */
130179193Sjb	if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
131179193Sjb		ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
132179193Sjb#ifdef _SYSCALL32_IMPL
133179193Sjb		ASSERT(systrace_sysent32[sysnum].stsy_entry == DTRACE_IDNONE);
134179193Sjb#endif
135179193Sjb	} else {
136179193Sjb		ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
137179193Sjb#ifdef _SYSCALL32_IMPL
138179193Sjb		ASSERT(systrace_sysent32[sysnum].stsy_return == DTRACE_IDNONE);
139179193Sjb#endif
140179193Sjb	}
141179193Sjb}
142179193Sjb
143179193Sjb/*ARGSUSED*/
144179193Sjbstatic void
145179193Sjbsystrace_enable(void *arg, dtrace_id_t id, void *parg)
146179193Sjb{
147179193Sjb	int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
148179193Sjb	int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
149179193Sjb	    systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
150179193Sjb
151179193Sjb	if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
152179193Sjb		systrace_sysent[sysnum].stsy_entry = id;
153179193Sjb#ifdef _SYSCALL32_IMPL
154179193Sjb		systrace_sysent32[sysnum].stsy_entry = id;
155179193Sjb#endif
156179193Sjb	} else {
157179193Sjb		systrace_sysent[sysnum].stsy_return = id;
158179193Sjb#ifdef _SYSCALL32_IMPL
159179193Sjb		systrace_sysent32[sysnum].stsy_return = id;
160179193Sjb#endif
161179193Sjb	}
162179193Sjb
163179193Sjb	if (enabled) {
164179193Sjb		ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
165179193Sjb		return;
166179193Sjb	}
167179193Sjb
168179193Sjb	(void) casptr(&sysent[sysnum].sy_callc,
169179193Sjb	    (void *)systrace_sysent[sysnum].stsy_underlying,
170179193Sjb	    (void *)dtrace_systrace_syscall);
171179193Sjb#ifdef _SYSCALL32_IMPL
172179193Sjb	(void) casptr(&sysent32[sysnum].sy_callc,
173179193Sjb	    (void *)systrace_sysent32[sysnum].stsy_underlying,
174179193Sjb	    (void *)dtrace_systrace_syscall32);
175179193Sjb#endif
176179193Sjb}
177179193Sjb
178179193Sjb/*ARGSUSED*/
179179193Sjbstatic void
180179193Sjbsystrace_disable(void *arg, dtrace_id_t id, void *parg)
181179193Sjb{
182179193Sjb	int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
183179193Sjb	int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
184179193Sjb	    systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
185179193Sjb
186179193Sjb	if (disable) {
187179193Sjb		(void) casptr(&sysent[sysnum].sy_callc,
188179193Sjb		    (void *)dtrace_systrace_syscall,
189179193Sjb		    (void *)systrace_sysent[sysnum].stsy_underlying);
190179193Sjb
191179193Sjb#ifdef _SYSCALL32_IMPL
192179193Sjb		(void) casptr(&sysent32[sysnum].sy_callc,
193179193Sjb		    (void *)dtrace_systrace_syscall32,
194179193Sjb		    (void *)systrace_sysent32[sysnum].stsy_underlying);
195179193Sjb#endif
196179193Sjb	}
197179193Sjb
198179193Sjb	if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
199179193Sjb		systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
200179193Sjb#ifdef _SYSCALL32_IMPL
201179193Sjb		systrace_sysent32[sysnum].stsy_entry = DTRACE_IDNONE;
202179193Sjb#endif
203179193Sjb	} else {
204179193Sjb		systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
205179193Sjb#ifdef _SYSCALL32_IMPL
206179193Sjb		systrace_sysent32[sysnum].stsy_return = DTRACE_IDNONE;
207179193Sjb#endif
208179193Sjb	}
209179193Sjb}
210179193Sjb
211179193Sjbstatic dtrace_pattr_t systrace_attr = {
212179193Sjb{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
213179193Sjb{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
214179193Sjb{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
215179193Sjb{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
216179193Sjb{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
217179193Sjb};
218179193Sjb
219179193Sjbstatic dtrace_pops_t systrace_pops = {
220179193Sjb	systrace_provide,
221179193Sjb	NULL,
222179193Sjb	systrace_enable,
223179193Sjb	systrace_disable,
224179193Sjb	NULL,
225179193Sjb	NULL,
226179193Sjb	NULL,
227179193Sjb	NULL,
228179193Sjb	NULL,
229179193Sjb	systrace_destroy
230179193Sjb};
231179193Sjb
232179193Sjbstatic int
233179193Sjbsystrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
234179193Sjb{
235179193Sjb	switch (cmd) {
236179193Sjb	case DDI_ATTACH:
237179193Sjb		break;
238179193Sjb	case DDI_RESUME:
239179193Sjb		return (DDI_SUCCESS);
240179193Sjb	default:
241179193Sjb		return (DDI_FAILURE);
242179193Sjb	}
243179193Sjb
244179193Sjb	systrace_probe = (void (*)())dtrace_probe;
245179193Sjb	membar_enter();
246179193Sjb
247179193Sjb	if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
248179193Sjb	    DDI_PSEUDO, NULL) == DDI_FAILURE ||
249179193Sjb	    dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
250179193Sjb	    &systrace_pops, NULL, &systrace_id) != 0) {
251179193Sjb		systrace_probe = systrace_stub;
252179193Sjb		ddi_remove_minor_node(devi, NULL);
253179193Sjb		return (DDI_FAILURE);
254179193Sjb	}
255179193Sjb
256179193Sjb	ddi_report_dev(devi);
257179193Sjb	systrace_devi = devi;
258179193Sjb
259179193Sjb	return (DDI_SUCCESS);
260179193Sjb}
261179193Sjb
262179193Sjbstatic int
263179193Sjbsystrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
264179193Sjb{
265179193Sjb	switch (cmd) {
266179193Sjb	case DDI_DETACH:
267179193Sjb		break;
268179193Sjb	case DDI_SUSPEND:
269179193Sjb		return (DDI_SUCCESS);
270179193Sjb	default:
271179193Sjb		return (DDI_FAILURE);
272179193Sjb	}
273179193Sjb
274179193Sjb	if (dtrace_unregister(systrace_id) != 0)
275179193Sjb		return (DDI_FAILURE);
276179193Sjb
277179193Sjb	ddi_remove_minor_node(devi, NULL);
278179193Sjb	systrace_probe = systrace_stub;
279179193Sjb	return (DDI_SUCCESS);
280179193Sjb}
281179193Sjb
282179193Sjb/*ARGSUSED*/
283179193Sjbstatic int
284179193Sjbsystrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
285179193Sjb{
286179193Sjb	int error;
287179193Sjb
288179193Sjb	switch (infocmd) {
289179193Sjb	case DDI_INFO_DEVT2DEVINFO:
290179193Sjb		*result = (void *)systrace_devi;
291179193Sjb		error = DDI_SUCCESS;
292179193Sjb		break;
293179193Sjb	case DDI_INFO_DEVT2INSTANCE:
294179193Sjb		*result = (void *)0;
295179193Sjb		error = DDI_SUCCESS;
296179193Sjb		break;
297179193Sjb	default:
298179193Sjb		error = DDI_FAILURE;
299179193Sjb	}
300179193Sjb	return (error);
301179193Sjb}
302179193Sjb
303179193Sjb/*ARGSUSED*/
304179193Sjbstatic int
305179193Sjbsystrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
306179193Sjb{
307179193Sjb	return (0);
308179193Sjb}
309179193Sjb
310179193Sjbstatic struct cb_ops systrace_cb_ops = {
311179193Sjb	systrace_open,		/* open */
312179193Sjb	nodev,			/* close */
313179193Sjb	nulldev,		/* strategy */
314179193Sjb	nulldev,		/* print */
315179193Sjb	nodev,			/* dump */
316179193Sjb	nodev,			/* read */
317179193Sjb	nodev,			/* write */
318179193Sjb	nodev,			/* ioctl */
319179193Sjb	nodev,			/* devmap */
320179193Sjb	nodev,			/* mmap */
321179193Sjb	nodev,			/* segmap */
322179193Sjb	nochpoll,		/* poll */
323179193Sjb	ddi_prop_op,		/* cb_prop_op */
324179193Sjb	0,			/* streamtab  */
325179193Sjb	D_NEW | D_MP		/* Driver compatibility flag */
326179193Sjb};
327179193Sjb
328179193Sjbstatic struct dev_ops systrace_ops = {
329179193Sjb	DEVO_REV,		/* devo_rev, */
330179193Sjb	0,			/* refcnt  */
331179193Sjb	systrace_info,		/* get_dev_info */
332179193Sjb	nulldev,		/* identify */
333179193Sjb	nulldev,		/* probe */
334179193Sjb	systrace_attach,	/* attach */
335179193Sjb	systrace_detach,	/* detach */
336179193Sjb	nodev,			/* reset */
337179193Sjb	&systrace_cb_ops,	/* driver operations */
338179193Sjb	NULL,			/* bus operations */
339179193Sjb	nodev			/* dev power */
340179193Sjb};
341179193Sjb
342179193Sjb/*
343179193Sjb * Module linkage information for the kernel.
344179193Sjb */
345179193Sjbstatic struct modldrv modldrv = {
346179193Sjb	&mod_driverops,		/* module type (this is a pseudo driver) */
347179193Sjb	"System Call Tracing",	/* name of module */
348179193Sjb	&systrace_ops,		/* driver ops */
349179193Sjb};
350179193Sjb
351179193Sjbstatic struct modlinkage modlinkage = {
352179193Sjb	MODREV_1,
353179193Sjb	(void *)&modldrv,
354179193Sjb	NULL
355179193Sjb};
356179193Sjb
357179193Sjbint
358179193Sjb_init(void)
359179193Sjb{
360179193Sjb	return (mod_install(&modlinkage));
361179193Sjb}
362179193Sjb
363179193Sjbint
364179193Sjb_info(struct modinfo *modinfop)
365179193Sjb{
366179193Sjb	return (mod_info(&modlinkage, modinfop));
367179193Sjb}
368179193Sjb
369179193Sjbint
370179193Sjb_fini(void)
371179193Sjb{
372179193Sjb	return (mod_remove(&modlinkage));
373179193Sjb}
374