1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26
27#include <sys/dtrace.h>
28#include <sys/systrace.h>
29#include <sys/stat.h>
30#include <sys/systm.h>
31#include <sys/conf.h>
32#include <sys/ddi.h>
33#include <sys/sunddi.h>
34#include <sys/atomic.h>
35
36#define	SYSTRACE_ARTIFICIAL_FRAMES	1
37
38#define	SYSTRACE_SHIFT			16
39#define	SYSTRACE_ISENTRY(x)		((int)(x) >> SYSTRACE_SHIFT)
40#define	SYSTRACE_SYSNUM(x)		((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
41#define	SYSTRACE_ENTRY(id)		((1 << SYSTRACE_SHIFT) | (id))
42#define	SYSTRACE_RETURN(id)		(id)
43
44#if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
45#error 1 << SYSTRACE_SHIFT must exceed number of system calls
46#endif
47
48static dev_info_t *systrace_devi;
49static dtrace_provider_id_t systrace_id;
50
51static void
52systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
53{
54	systrace_sysent_t *sysent = *interposed;
55	int i;
56
57	if (sysent == NULL) {
58		*interposed = sysent = kmem_zalloc(sizeof (systrace_sysent_t) *
59		    NSYSCALL, KM_SLEEP);
60	}
61
62	for (i = 0; i < NSYSCALL; i++) {
63		struct sysent *a = &actual[i];
64		systrace_sysent_t *s = &sysent[i];
65
66		if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
67			continue;
68
69		if (a->sy_callc == dtrace_systrace_syscall)
70			continue;
71
72#ifdef _SYSCALL32_IMPL
73		if (a->sy_callc == dtrace_systrace_syscall32)
74			continue;
75#endif
76
77		s->stsy_underlying = a->sy_callc;
78	}
79}
80
81/*ARGSUSED*/
82static void
83systrace_provide(void *arg, const dtrace_probedesc_t *desc)
84{
85	int i;
86
87	if (desc != NULL)
88		return;
89
90	systrace_init(sysent, &systrace_sysent);
91#ifdef _SYSCALL32_IMPL
92	systrace_init(sysent32, &systrace_sysent32);
93#endif
94
95	for (i = 0; i < NSYSCALL; i++) {
96		if (systrace_sysent[i].stsy_underlying == NULL)
97			continue;
98
99		if (dtrace_probe_lookup(systrace_id, NULL,
100		    syscallnames[i], "entry") != 0)
101			continue;
102
103		(void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
104		    "entry", SYSTRACE_ARTIFICIAL_FRAMES,
105		    (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
106		(void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
107		    "return", SYSTRACE_ARTIFICIAL_FRAMES,
108		    (void *)((uintptr_t)SYSTRACE_RETURN(i)));
109
110		systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
111		systrace_sysent[i].stsy_return = DTRACE_IDNONE;
112#ifdef _SYSCALL32_IMPL
113		systrace_sysent32[i].stsy_entry = DTRACE_IDNONE;
114		systrace_sysent32[i].stsy_return = DTRACE_IDNONE;
115#endif
116	}
117}
118
119/*ARGSUSED*/
120static void
121systrace_destroy(void *arg, dtrace_id_t id, void *parg)
122{
123	int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
124
125	/*
126	 * There's nothing to do here but assert that we have actually been
127	 * disabled.
128	 */
129	if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
130		ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
131#ifdef _SYSCALL32_IMPL
132		ASSERT(systrace_sysent32[sysnum].stsy_entry == DTRACE_IDNONE);
133#endif
134	} else {
135		ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
136#ifdef _SYSCALL32_IMPL
137		ASSERT(systrace_sysent32[sysnum].stsy_return == DTRACE_IDNONE);
138#endif
139	}
140}
141
142/*ARGSUSED*/
143static int
144systrace_enable(void *arg, dtrace_id_t id, void *parg)
145{
146	int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
147	int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
148	    systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
149
150	if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
151		systrace_sysent[sysnum].stsy_entry = id;
152#ifdef _SYSCALL32_IMPL
153		systrace_sysent32[sysnum].stsy_entry = id;
154#endif
155	} else {
156		systrace_sysent[sysnum].stsy_return = id;
157#ifdef _SYSCALL32_IMPL
158		systrace_sysent32[sysnum].stsy_return = id;
159#endif
160	}
161
162	if (enabled) {
163		ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
164		return (0);
165	}
166
167	(void) casptr(&sysent[sysnum].sy_callc,
168	    (void *)systrace_sysent[sysnum].stsy_underlying,
169	    (void *)dtrace_systrace_syscall);
170#ifdef _SYSCALL32_IMPL
171	(void) casptr(&sysent32[sysnum].sy_callc,
172	    (void *)systrace_sysent32[sysnum].stsy_underlying,
173	    (void *)dtrace_systrace_syscall32);
174#endif
175	return (0);
176}
177
178/*ARGSUSED*/
179static void
180systrace_disable(void *arg, dtrace_id_t id, void *parg)
181{
182	int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
183	int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
184	    systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
185
186	if (disable) {
187		(void) casptr(&sysent[sysnum].sy_callc,
188		    (void *)dtrace_systrace_syscall,
189		    (void *)systrace_sysent[sysnum].stsy_underlying);
190
191#ifdef _SYSCALL32_IMPL
192		(void) casptr(&sysent32[sysnum].sy_callc,
193		    (void *)dtrace_systrace_syscall32,
194		    (void *)systrace_sysent32[sysnum].stsy_underlying);
195#endif
196	}
197
198	if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
199		systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
200#ifdef _SYSCALL32_IMPL
201		systrace_sysent32[sysnum].stsy_entry = DTRACE_IDNONE;
202#endif
203	} else {
204		systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
205#ifdef _SYSCALL32_IMPL
206		systrace_sysent32[sysnum].stsy_return = DTRACE_IDNONE;
207#endif
208	}
209}
210
211static dtrace_pattr_t systrace_attr = {
212{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
213{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
214{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
215{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
216{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
217};
218
219static dtrace_pops_t systrace_pops = {
220	systrace_provide,
221	NULL,
222	systrace_enable,
223	systrace_disable,
224	NULL,
225	NULL,
226	NULL,
227	NULL,
228	NULL,
229	systrace_destroy
230};
231
232static int
233systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
234{
235	switch (cmd) {
236	case DDI_ATTACH:
237		break;
238	case DDI_RESUME:
239		return (DDI_SUCCESS);
240	default:
241		return (DDI_FAILURE);
242	}
243
244	systrace_probe = (void (*)())dtrace_probe;
245	membar_enter();
246
247	if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
248	    DDI_PSEUDO, NULL) == DDI_FAILURE ||
249	    dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
250	    &systrace_pops, NULL, &systrace_id) != 0) {
251		systrace_probe = systrace_stub;
252		ddi_remove_minor_node(devi, NULL);
253		return (DDI_FAILURE);
254	}
255
256	ddi_report_dev(devi);
257	systrace_devi = devi;
258
259	return (DDI_SUCCESS);
260}
261
262static int
263systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
264{
265	switch (cmd) {
266	case DDI_DETACH:
267		break;
268	case DDI_SUSPEND:
269		return (DDI_SUCCESS);
270	default:
271		return (DDI_FAILURE);
272	}
273
274	if (dtrace_unregister(systrace_id) != 0)
275		return (DDI_FAILURE);
276
277	ddi_remove_minor_node(devi, NULL);
278	systrace_probe = systrace_stub;
279	return (DDI_SUCCESS);
280}
281
282/*ARGSUSED*/
283static int
284systrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
285{
286	int error;
287
288	switch (infocmd) {
289	case DDI_INFO_DEVT2DEVINFO:
290		*result = (void *)systrace_devi;
291		error = DDI_SUCCESS;
292		break;
293	case DDI_INFO_DEVT2INSTANCE:
294		*result = (void *)0;
295		error = DDI_SUCCESS;
296		break;
297	default:
298		error = DDI_FAILURE;
299	}
300	return (error);
301}
302
303/*ARGSUSED*/
304static int
305systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
306{
307	return (0);
308}
309
310static struct cb_ops systrace_cb_ops = {
311	systrace_open,		/* open */
312	nodev,			/* close */
313	nulldev,		/* strategy */
314	nulldev,		/* print */
315	nodev,			/* dump */
316	nodev,			/* read */
317	nodev,			/* write */
318	nodev,			/* ioctl */
319	nodev,			/* devmap */
320	nodev,			/* mmap */
321	nodev,			/* segmap */
322	nochpoll,		/* poll */
323	ddi_prop_op,		/* cb_prop_op */
324	0,			/* streamtab  */
325	D_NEW | D_MP		/* Driver compatibility flag */
326};
327
328static struct dev_ops systrace_ops = {
329	DEVO_REV,		/* devo_rev, */
330	0,			/* refcnt  */
331	systrace_info,		/* get_dev_info */
332	nulldev,		/* identify */
333	nulldev,		/* probe */
334	systrace_attach,	/* attach */
335	systrace_detach,	/* detach */
336	nodev,			/* reset */
337	&systrace_cb_ops,	/* driver operations */
338	NULL,			/* bus operations */
339	nodev,			/* dev power */
340	ddi_quiesce_not_needed,		/* quiesce */
341};
342
343/*
344 * Module linkage information for the kernel.
345 */
346static struct modldrv modldrv = {
347	&mod_driverops,		/* module type (this is a pseudo driver) */
348	"System Call Tracing",	/* name of module */
349	&systrace_ops,		/* driver ops */
350};
351
352static struct modlinkage modlinkage = {
353	MODREV_1,
354	(void *)&modldrv,
355	NULL
356};
357
358int
359_init(void)
360{
361	return (mod_install(&modlinkage));
362}
363
364int
365_info(struct modinfo *modinfop)
366{
367	return (mod_info(&modlinkage, modinfop));
368}
369
370int
371_fini(void)
372{
373	return (mod_remove(&modlinkage));
374}
375