1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#include <sys/dtrace.h>
29#include <sys/systrace.h>
30#include <sys/stat.h>
31#include <sys/systm.h>
32#include <sys/conf.h>
33#include <sys/ddi.h>
34#include <sys/sunddi.h>
35#include <sys/atomic.h>
36
37#define	SYSTRACE_ARTIFICIAL_FRAMES	1
38
39#define	SYSTRACE_SHIFT			16
40#define	SYSTRACE_ISENTRY(x)		((int)(x) >> SYSTRACE_SHIFT)
41#define	SYSTRACE_SYSNUM(x)		((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
42#define	SYSTRACE_ENTRY(id)		((1 << SYSTRACE_SHIFT) | (id))
43#define	SYSTRACE_RETURN(id)		(id)
44
45#if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
46#error 1 << SYSTRACE_SHIFT must exceed number of system calls
47#endif
48
49static dev_info_t *systrace_devi;
50static dtrace_provider_id_t systrace_id;
51
52static void
53systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
54{
55	systrace_sysent_t *sysent = *interposed;
56	int i;
57
58	if (sysent == NULL) {
59		*interposed = sysent = kmem_zalloc(sizeof (systrace_sysent_t) *
60		    NSYSCALL, KM_SLEEP);
61	}
62
63	for (i = 0; i < NSYSCALL; i++) {
64		struct sysent *a = &actual[i];
65		systrace_sysent_t *s = &sysent[i];
66
67		if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
68			continue;
69
70		if (a->sy_callc == dtrace_systrace_syscall)
71			continue;
72
73#ifdef _SYSCALL32_IMPL
74		if (a->sy_callc == dtrace_systrace_syscall32)
75			continue;
76#endif
77
78		s->stsy_underlying = a->sy_callc;
79	}
80}
81
82/*ARGSUSED*/
83static void
84systrace_provide(void *arg, const dtrace_probedesc_t *desc)
85{
86	int i;
87
88	if (desc != NULL)
89		return;
90
91	systrace_init(sysent, &systrace_sysent);
92#ifdef _SYSCALL32_IMPL
93	systrace_init(sysent32, &systrace_sysent32);
94#endif
95
96	for (i = 0; i < NSYSCALL; i++) {
97		if (systrace_sysent[i].stsy_underlying == NULL)
98			continue;
99
100		if (dtrace_probe_lookup(systrace_id, NULL,
101		    syscallnames[i], "entry") != 0)
102			continue;
103
104		(void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
105		    "entry", SYSTRACE_ARTIFICIAL_FRAMES,
106		    (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
107		(void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
108		    "return", SYSTRACE_ARTIFICIAL_FRAMES,
109		    (void *)((uintptr_t)SYSTRACE_RETURN(i)));
110
111		systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
112		systrace_sysent[i].stsy_return = DTRACE_IDNONE;
113#ifdef _SYSCALL32_IMPL
114		systrace_sysent32[i].stsy_entry = DTRACE_IDNONE;
115		systrace_sysent32[i].stsy_return = DTRACE_IDNONE;
116#endif
117	}
118}
119
120/*ARGSUSED*/
121static void
122systrace_destroy(void *arg, dtrace_id_t id, void *parg)
123{
124	int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
125
126	/*
127	 * There's nothing to do here but assert that we have actually been
128	 * disabled.
129	 */
130	if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
131		ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
132#ifdef _SYSCALL32_IMPL
133		ASSERT(systrace_sysent32[sysnum].stsy_entry == DTRACE_IDNONE);
134#endif
135	} else {
136		ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
137#ifdef _SYSCALL32_IMPL
138		ASSERT(systrace_sysent32[sysnum].stsy_return == DTRACE_IDNONE);
139#endif
140	}
141}
142
143/*ARGSUSED*/
144static int
145systrace_enable(void *arg, dtrace_id_t id, void *parg)
146{
147	int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
148	int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
149	    systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
150
151	if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
152		systrace_sysent[sysnum].stsy_entry = id;
153#ifdef _SYSCALL32_IMPL
154		systrace_sysent32[sysnum].stsy_entry = id;
155#endif
156	} else {
157		systrace_sysent[sysnum].stsy_return = id;
158#ifdef _SYSCALL32_IMPL
159		systrace_sysent32[sysnum].stsy_return = id;
160#endif
161	}
162
163	if (enabled) {
164		ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
165		return (0);
166	}
167
168	(void) casptr(&sysent[sysnum].sy_callc,
169	    (void *)systrace_sysent[sysnum].stsy_underlying,
170	    (void *)dtrace_systrace_syscall);
171#ifdef _SYSCALL32_IMPL
172	(void) casptr(&sysent32[sysnum].sy_callc,
173	    (void *)systrace_sysent32[sysnum].stsy_underlying,
174	    (void *)dtrace_systrace_syscall32);
175#endif
176	return (0);
177}
178
179/*ARGSUSED*/
180static void
181systrace_disable(void *arg, dtrace_id_t id, void *parg)
182{
183	int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
184	int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
185	    systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
186
187	if (disable) {
188		(void) casptr(&sysent[sysnum].sy_callc,
189		    (void *)dtrace_systrace_syscall,
190		    (void *)systrace_sysent[sysnum].stsy_underlying);
191
192#ifdef _SYSCALL32_IMPL
193		(void) casptr(&sysent32[sysnum].sy_callc,
194		    (void *)dtrace_systrace_syscall32,
195		    (void *)systrace_sysent32[sysnum].stsy_underlying);
196#endif
197	}
198
199	if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
200		systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
201#ifdef _SYSCALL32_IMPL
202		systrace_sysent32[sysnum].stsy_entry = DTRACE_IDNONE;
203#endif
204	} else {
205		systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
206#ifdef _SYSCALL32_IMPL
207		systrace_sysent32[sysnum].stsy_return = DTRACE_IDNONE;
208#endif
209	}
210}
211
212static dtrace_pattr_t systrace_attr = {
213{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
214{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
215{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
216{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
217{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
218};
219
220static dtrace_pops_t systrace_pops = {
221	systrace_provide,
222	NULL,
223	systrace_enable,
224	systrace_disable,
225	NULL,
226	NULL,
227	NULL,
228	NULL,
229	NULL,
230	systrace_destroy
231};
232
233static int
234systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
235{
236	switch (cmd) {
237	case DDI_ATTACH:
238		break;
239	case DDI_RESUME:
240		return (DDI_SUCCESS);
241	default:
242		return (DDI_FAILURE);
243	}
244
245	systrace_probe = (void (*)())dtrace_probe;
246	membar_enter();
247
248	if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
249	    DDI_PSEUDO, NULL) == DDI_FAILURE ||
250	    dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
251	    &systrace_pops, NULL, &systrace_id) != 0) {
252		systrace_probe = systrace_stub;
253		ddi_remove_minor_node(devi, NULL);
254		return (DDI_FAILURE);
255	}
256
257	ddi_report_dev(devi);
258	systrace_devi = devi;
259
260	return (DDI_SUCCESS);
261}
262
263static int
264systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
265{
266	switch (cmd) {
267	case DDI_DETACH:
268		break;
269	case DDI_SUSPEND:
270		return (DDI_SUCCESS);
271	default:
272		return (DDI_FAILURE);
273	}
274
275	if (dtrace_unregister(systrace_id) != 0)
276		return (DDI_FAILURE);
277
278	ddi_remove_minor_node(devi, NULL);
279	systrace_probe = systrace_stub;
280	return (DDI_SUCCESS);
281}
282
283/*ARGSUSED*/
284static int
285systrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
286{
287	int error;
288
289	switch (infocmd) {
290	case DDI_INFO_DEVT2DEVINFO:
291		*result = (void *)systrace_devi;
292		error = DDI_SUCCESS;
293		break;
294	case DDI_INFO_DEVT2INSTANCE:
295		*result = (void *)0;
296		error = DDI_SUCCESS;
297		break;
298	default:
299		error = DDI_FAILURE;
300	}
301	return (error);
302}
303
304/*ARGSUSED*/
305static int
306systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
307{
308	return (0);
309}
310
311static struct cb_ops systrace_cb_ops = {
312	systrace_open,		/* open */
313	nodev,			/* close */
314	nulldev,		/* strategy */
315	nulldev,		/* print */
316	nodev,			/* dump */
317	nodev,			/* read */
318	nodev,			/* write */
319	nodev,			/* ioctl */
320	nodev,			/* devmap */
321	nodev,			/* mmap */
322	nodev,			/* segmap */
323	nochpoll,		/* poll */
324	ddi_prop_op,		/* cb_prop_op */
325	0,			/* streamtab  */
326	D_NEW | D_MP		/* Driver compatibility flag */
327};
328
329static struct dev_ops systrace_ops = {
330	DEVO_REV,		/* devo_rev, */
331	0,			/* refcnt  */
332	systrace_info,		/* get_dev_info */
333	nulldev,		/* identify */
334	nulldev,		/* probe */
335	systrace_attach,	/* attach */
336	systrace_detach,	/* detach */
337	nodev,			/* reset */
338	&systrace_cb_ops,	/* driver operations */
339	NULL,			/* bus operations */
340	nodev			/* dev power */
341};
342
343/*
344 * Module linkage information for the kernel.
345 */
346static struct modldrv modldrv = {
347	&mod_driverops,		/* module type (this is a pseudo driver) */
348	"System Call Tracing",	/* name of module */
349	&systrace_ops,		/* driver ops */
350};
351
352static struct modlinkage modlinkage = {
353	MODREV_1,
354	(void *)&modldrv,
355	NULL
356};
357
358int
359_init(void)
360{
361	return (mod_install(&modlinkage));
362}
363
364int
365_info(struct modinfo *modinfop)
366{
367	return (mod_info(&modlinkage, modinfop));
368}
369
370int
371_fini(void)
372{
373	return (mod_remove(&modlinkage));
374}
375