1179237Sjb/*
2179237Sjb * CDDL HEADER START
3179237Sjb *
4179237Sjb * The contents of this file are subject to the terms of the
5179237Sjb * Common Development and Distribution License (the "License").
6179237Sjb * You may not use this file except in compliance with the License.
7179237Sjb *
8179237Sjb * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9179237Sjb * or http://www.opensolaris.org/os/licensing.
10179237Sjb * See the License for the specific language governing permissions
11179237Sjb * and limitations under the License.
12179237Sjb *
13179237Sjb * When distributing Covered Code, include this CDDL HEADER in each
14179237Sjb * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15179237Sjb * If applicable, add the following below this CDDL HEADER, with the
16179237Sjb * fields enclosed by brackets "[]" replaced with your own identifying
17179237Sjb * information: Portions Copyright [yyyy] [name of copyright owner]
18179237Sjb *
19179237Sjb * CDDL HEADER END
20179237Sjb *
21179237Sjb * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
22179237Sjb *
23179237Sjb * $FreeBSD$
24179237Sjb *
25179237Sjb */
26179237Sjb
27179237Sjb/*
28179237Sjb * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
29179237Sjb * Use is subject to license terms.
30179237Sjb */
31179237Sjb
32179237Sjb#include <sys/cdefs.h>
33179237Sjb#include <sys/param.h>
34179237Sjb#include <sys/systm.h>
35179237Sjb#include <sys/conf.h>
36179237Sjb#include <sys/cpuvar.h>
37179237Sjb#include <sys/fcntl.h>
38179237Sjb#include <sys/filio.h>
39179237Sjb#include <sys/kdb.h>
40179237Sjb#include <sys/kernel.h>
41179237Sjb#include <sys/kmem.h>
42179237Sjb#include <sys/kthread.h>
43179237Sjb#include <sys/limits.h>
44179237Sjb#include <sys/linker.h>
45179237Sjb#include <sys/lock.h>
46179237Sjb#include <sys/malloc.h>
47179237Sjb#include <sys/module.h>
48179237Sjb#include <sys/mutex.h>
49179237Sjb#include <sys/poll.h>
50179237Sjb#include <sys/proc.h>
51179237Sjb#include <sys/selinfo.h>
52179237Sjb#include <sys/smp.h>
53184698Srodrigc#include <sys/sysproto.h>
54179237Sjb#include <sys/sysent.h>
55179237Sjb#include <sys/uio.h>
56179237Sjb#include <sys/unistd.h>
57179237Sjb#include <machine/stdarg.h>
58179237Sjb
59179237Sjb#include <sys/dtrace.h>
60179237Sjb
61184698Srodrigc#ifdef LINUX_SYSTRACE
62219561Savg#if defined(__amd64__)
63294368Sjhb#include <amd64/linux/linux.h>
64294368Sjhb#include <amd64/linux/linux_proto.h>
65294368Sjhb#include <amd64/linux/linux_syscalls.c>
66294368Sjhb#include <amd64/linux/linux_systrace_args.c>
67219561Savg#elif defined(__i386__)
68219561Savg#include <i386/linux/linux.h>
69219561Savg#include <i386/linux/linux_proto.h>
70219561Savg#include <i386/linux/linux_syscalls.c>
71219561Savg#include <i386/linux/linux_systrace_args.c>
72219561Savg#else
73219561Savg#error Only i386 and amd64 are supported.
74219561Savg#endif
75294368Sjhb#define	MODNAME		"linux"
76184698Srodrigcextern struct sysent linux_sysent[];
77184698Srodrigc#define	MAXSYSCALL	LINUX_SYS_MAXSYSCALL
78184698Srodrigc#define	SYSCALLNAMES	linux_syscallnames
79184698Srodrigc#define	SYSENT		linux_sysent
80294368Sjhb#elif defined(LINUX32_SYSTRACE)
81294368Sjhb#if defined(__amd64__)
82294368Sjhb#include <amd64/linux32/linux.h>
83294368Sjhb#include <amd64/linux32/linux32_proto.h>
84294368Sjhb#include <amd64/linux32/linux32_syscalls.c>
85294368Sjhb#include <amd64/linux32/linux32_systrace_args.c>
86294368Sjhb#else
87294368Sjhb#error Only amd64 is supported.
88294368Sjhb#endif
89294368Sjhb#define	MODNAME		"linux32"
90294368Sjhbextern struct sysent linux32_sysent[];
91294368Sjhb#define	MAXSYSCALL	LINUX32_SYS_MAXSYSCALL
92294368Sjhb#define	SYSCALLNAMES	linux32_syscallnames
93294368Sjhb#define	SYSENT		linux32_sysent
94219561Savg#elif defined(FREEBSD32_SYSTRACE)
95219561Savg/*
96219561Savg * The syscall arguments are processed into a DTrace argument array
97219561Savg * using a generated function. See sys/kern/makesyscalls.sh.
98219561Savg */
99219561Savg#include <compat/freebsd32/freebsd32_proto.h>
100219561Savg#include <compat/freebsd32/freebsd32_util.h>
101219561Savg#include <compat/freebsd32/freebsd32_syscall.h>
102219561Savg#include <compat/freebsd32/freebsd32_systrace_args.c>
103219561Savgextern const char *freebsd32_syscallnames[];
104219561Savg#define	MODNAME		"freebsd32"
105219561Savg#define	MAXSYSCALL	FREEBSD32_SYS_MAXSYSCALL
106219561Savg#define	SYSCALLNAMES	freebsd32_syscallnames
107219561Savg#define	SYSENT		freebsd32_sysent
108184698Srodrigc#else
109184698Srodrigc/*
110184698Srodrigc * The syscall arguments are processed into a DTrace argument array
111184698Srodrigc * using a generated function. See sys/kern/makesyscalls.sh.
112184698Srodrigc */
113184698Srodrigc#include <sys/syscall.h>
114184698Srodrigc#include <kern/systrace_args.c>
115219561Savg#define	MODNAME		"freebsd"
116184698Srodrigc#define	MAXSYSCALL	SYS_MAXSYSCALL
117184698Srodrigc#define	SYSCALLNAMES	syscallnames
118184698Srodrigc#define	SYSENT		sysent
119294368Sjhb#define	NATIVE_ABI
120184698Srodrigc#endif
121184698Srodrigc
122219561Savg#define	PROVNAME	"syscall"
123219561Savg#define	DEVNAME	        "dtrace/systrace/" MODNAME
124219561Savg
125179237Sjb#define	SYSTRACE_ARTIFICIAL_FRAMES	1
126179237Sjb
127179237Sjb#define	SYSTRACE_SHIFT			16
128179237Sjb#define	SYSTRACE_ISENTRY(x)		((int)(x) >> SYSTRACE_SHIFT)
129179237Sjb#define	SYSTRACE_SYSNUM(x)		((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
130179237Sjb#define	SYSTRACE_ENTRY(id)		((1 << SYSTRACE_SHIFT) | (id))
131179237Sjb#define	SYSTRACE_RETURN(id)		(id)
132179237Sjb
133184698Srodrigc#if ((1 << SYSTRACE_SHIFT) <= MAXSYSCALL)
134179237Sjb#error 1 << SYSTRACE_SHIFT must exceed number of system calls
135179237Sjb#endif
136179237Sjb
137179237Sjbstatic d_open_t	systrace_open;
138179237Sjbstatic int	systrace_unload(void);
139179237Sjbstatic void	systrace_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
140179237Sjbstatic void	systrace_provide(void *, dtrace_probedesc_t *);
141179237Sjbstatic void	systrace_destroy(void *, dtrace_id_t, void *);
142179237Sjbstatic void	systrace_enable(void *, dtrace_id_t, void *);
143179237Sjbstatic void	systrace_disable(void *, dtrace_id_t, void *);
144179237Sjbstatic void	systrace_load(void *);
145179237Sjb
146179237Sjbstatic struct cdevsw systrace_cdevsw = {
147179237Sjb	.d_version	= D_VERSION,
148179237Sjb	.d_open		= systrace_open,
149294368Sjhb#ifndef NATIVE_ABI
150220437Sart	.d_name		= "systrace_" MODNAME,
151184698Srodrigc#else
152179237Sjb	.d_name		= "systrace",
153184698Srodrigc#endif
154179237Sjb};
155179237Sjb
156184698Srodrigcstatic union	{
157184698Srodrigc	const char	**p_constnames;
158184698Srodrigc	char		**pp_syscallnames;
159184698Srodrigc} uglyhack = { SYSCALLNAMES };
160184698Srodrigc
161179237Sjbstatic dtrace_pattr_t systrace_attr = {
162179237Sjb{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
163179237Sjb{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
164179237Sjb{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
165179237Sjb{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
166179237Sjb{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
167179237Sjb};
168179237Sjb
169179237Sjbstatic dtrace_pops_t systrace_pops = {
170179237Sjb	systrace_provide,
171179237Sjb	NULL,
172179237Sjb	systrace_enable,
173179237Sjb	systrace_disable,
174179237Sjb	NULL,
175179237Sjb	NULL,
176179237Sjb	systrace_getargdesc,
177179237Sjb	NULL,
178179237Sjb	NULL,
179179237Sjb	systrace_destroy
180179237Sjb};
181179237Sjb
182179237Sjbstatic struct cdev		*systrace_cdev;
183179237Sjbstatic dtrace_provider_id_t	systrace_id;
184179237Sjb
185269272Smarkjtypedef void (*systrace_dtrace_probe_t)(dtrace_id_t, uintptr_t, uintptr_t,
186269272Smarkj    uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
187269272Smarkj
188294368Sjhb#ifdef NATIVE_ABI
189179237Sjb/*
190184698Srodrigc * Probe callback function.
191184698Srodrigc *
192184698Srodrigc * Note: This function is called for _all_ syscalls, regardless of which sysent
193184698Srodrigc *       array the syscall comes from. It could be a standard syscall or a
194184698Srodrigc *       compat syscall from something like Linux.
195179237Sjb */
196179237Sjbstatic void
197211608Srpaulosystrace_probe(u_int32_t id, int sysnum, struct sysent *sysent, void *params,
198211608Srpaulo    int ret)
199179237Sjb{
200269272Smarkj	systrace_dtrace_probe_t probe;
201179237Sjb	int		n_args	= 0;
202179237Sjb	u_int64_t	uargs[8];
203179237Sjb
204211608Srpaulo	memset(uargs, 0, sizeof(uargs));
205179237Sjb	/*
206184698Srodrigc	 * Check if this syscall has an argument conversion function
207184698Srodrigc	 * registered.
208179237Sjb	 */
209211608Srpaulo	if (params && sysent->sy_systrace_args_func != NULL) {
210179237Sjb		/*
211179237Sjb		 * Convert the syscall parameters using the registered
212179237Sjb		 * function.
213179237Sjb		 */
214184698Srodrigc		(*sysent->sy_systrace_args_func)(sysnum, params, uargs, &n_args);
215211608Srpaulo	} else if (params) {
216179237Sjb		/*
217179237Sjb		 * Use the built-in system call argument conversion
218179237Sjb		 * function to translate the syscall structure fields
219184698Srodrigc		 * into the array of 64-bit values that DTrace
220179237Sjb		 * expects.
221179237Sjb		 */
222179237Sjb		systrace_args(sysnum, params, uargs, &n_args);
223211608Srpaulo	} else {
224211608Srpaulo		/*
225211608Srpaulo		 * Since params is NULL, this is a 'return' probe.
226211608Srpaulo		 * Set arg0 and arg1 as the return value of this syscall.
227211608Srpaulo		 */
228211608Srpaulo		uargs[0] = uargs[1] = ret;
229211608Srpaulo	}
230179237Sjb
231179237Sjb	/* Process the probe using the converted argments. */
232269272Smarkj	probe = (systrace_dtrace_probe_t)dtrace_probe;
233269272Smarkj	probe(id, uargs[0], uargs[1], uargs[2], uargs[3], uargs[4], uargs[5],
234269272Smarkj	    uargs[6], uargs[7]);
235179237Sjb}
236227441Srstone
237184698Srodrigc#endif
238179237Sjb
239179237Sjbstatic void
240179237Sjbsystrace_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc)
241179237Sjb{
242179237Sjb	int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
243179237Sjb
244227441Srstone	if (SYSTRACE_ISENTRY((uintptr_t)parg))
245227441Srstone		systrace_entry_setargdesc(sysnum, desc->dtargd_ndx,
246227441Srstone		    desc->dtargd_native, sizeof(desc->dtargd_native));
247227441Srstone	else
248227441Srstone		systrace_return_setargdesc(sysnum, desc->dtargd_ndx,
249227441Srstone		    desc->dtargd_native, sizeof(desc->dtargd_native));
250179237Sjb
251179237Sjb	if (desc->dtargd_native[0] == '\0')
252179237Sjb		desc->dtargd_ndx = DTRACE_ARGNONE;
253179237Sjb
254179237Sjb	return;
255179237Sjb}
256179237Sjb
257179237Sjbstatic void
258179237Sjbsystrace_provide(void *arg, dtrace_probedesc_t *desc)
259179237Sjb{
260179237Sjb	int i;
261179237Sjb
262179237Sjb	if (desc != NULL)
263179237Sjb		return;
264179237Sjb
265184698Srodrigc	for (i = 0; i < MAXSYSCALL; i++) {
266219561Savg		if (dtrace_probe_lookup(systrace_id, MODNAME,
267184698Srodrigc		    uglyhack.pp_syscallnames[i], "entry") != 0)
268179237Sjb			continue;
269179237Sjb
270219561Savg		(void) dtrace_probe_create(systrace_id, MODNAME, uglyhack.pp_syscallnames[i],
271179237Sjb		    "entry", SYSTRACE_ARTIFICIAL_FRAMES,
272179237Sjb		    (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
273219561Savg		(void) dtrace_probe_create(systrace_id, MODNAME, uglyhack.pp_syscallnames[i],
274179237Sjb		    "return", SYSTRACE_ARTIFICIAL_FRAMES,
275179237Sjb		    (void *)((uintptr_t)SYSTRACE_RETURN(i)));
276179237Sjb	}
277179237Sjb}
278179237Sjb
279179237Sjbstatic void
280179237Sjbsystrace_destroy(void *arg, dtrace_id_t id, void *parg)
281179237Sjb{
282179237Sjb#ifdef DEBUG
283179237Sjb	int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
284179237Sjb
285179237Sjb	/*
286179237Sjb	 * There's nothing to do here but assert that we have actually been
287179237Sjb	 * disabled.
288179237Sjb	 */
289179237Sjb	if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
290179237Sjb		ASSERT(sysent[sysnum].sy_entry == 0);
291179237Sjb	} else {
292179237Sjb		ASSERT(sysent[sysnum].sy_return == 0);
293179237Sjb	}
294179237Sjb#endif
295179237Sjb}
296179237Sjb
297179237Sjbstatic void
298179237Sjbsystrace_enable(void *arg, dtrace_id_t id, void *parg)
299179237Sjb{
300179237Sjb	int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
301179237Sjb
302184698Srodrigc	if (SYSENT[sysnum].sy_systrace_args_func == NULL)
303184698Srodrigc		SYSENT[sysnum].sy_systrace_args_func = systrace_args;
304184698Srodrigc
305179237Sjb	if (SYSTRACE_ISENTRY((uintptr_t)parg))
306184698Srodrigc		SYSENT[sysnum].sy_entry = id;
307179237Sjb	else
308184698Srodrigc		SYSENT[sysnum].sy_return = id;
309179237Sjb}
310179237Sjb
311179237Sjbstatic void
312179237Sjbsystrace_disable(void *arg, dtrace_id_t id, void *parg)
313179237Sjb{
314179237Sjb	int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
315179237Sjb
316184698Srodrigc	SYSENT[sysnum].sy_entry = 0;
317184698Srodrigc	SYSENT[sysnum].sy_return = 0;
318179237Sjb}
319179237Sjb
320179237Sjbstatic void
321179237Sjbsystrace_load(void *dummy)
322179237Sjb{
323179237Sjb	/* Create the /dev/dtrace/systrace entry. */
324179237Sjb	systrace_cdev = make_dev(&systrace_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
325184698Srodrigc	   DEVNAME);
326179237Sjb
327184698Srodrigc	if (dtrace_register(PROVNAME, &systrace_attr, DTRACE_PRIV_USER,
328179237Sjb	    NULL, &systrace_pops, NULL, &systrace_id) != 0)
329179237Sjb		return;
330179237Sjb
331294368Sjhb#ifdef NATIVE_ABI
332179237Sjb	systrace_probe_func = systrace_probe;
333184698Srodrigc#endif
334179237Sjb}
335179237Sjb
336179237Sjb
337179237Sjbstatic int
338179237Sjbsystrace_unload()
339179237Sjb{
340179237Sjb	int error = 0;
341179237Sjb
342179237Sjb	if ((error = dtrace_unregister(systrace_id)) != 0)
343179237Sjb		return (error);
344179237Sjb
345294368Sjhb#ifdef NATIVE_ABI
346179237Sjb	systrace_probe_func = NULL;
347184698Srodrigc#endif
348179237Sjb
349179237Sjb	destroy_dev(systrace_cdev);
350179237Sjb
351179237Sjb	return (error);
352179237Sjb}
353179237Sjb
354179237Sjbstatic int
355179237Sjbsystrace_modevent(module_t mod __unused, int type, void *data __unused)
356179237Sjb{
357179237Sjb	int error = 0;
358179237Sjb
359179237Sjb	switch (type) {
360179237Sjb	case MOD_LOAD:
361179237Sjb		break;
362179237Sjb
363179237Sjb	case MOD_UNLOAD:
364179237Sjb		break;
365179237Sjb
366179237Sjb	case MOD_SHUTDOWN:
367179237Sjb		break;
368179237Sjb
369179237Sjb	default:
370179237Sjb		error = EOPNOTSUPP;
371179237Sjb		break;
372179237Sjb
373179237Sjb	}
374179237Sjb	return (error);
375179237Sjb}
376179237Sjb
377179237Sjbstatic int
378179237Sjbsystrace_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused)
379179237Sjb{
380179237Sjb	return (0);
381179237Sjb}
382179237Sjb
383179237SjbSYSINIT(systrace_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, systrace_load, NULL);
384179237SjbSYSUNINIT(systrace_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, systrace_unload, NULL);
385179237Sjb
386184698Srodrigc#ifdef LINUX_SYSTRACE
387294368SjhbDEV_MODULE(systrace_linux, systrace_modevent, NULL);
388294368SjhbMODULE_VERSION(systrace_linux, 1);
389294368Sjhb#ifdef __amd64__
390294368SjhbMODULE_DEPEND(systrace_linux, linux64, 1, 1, 1);
391294368Sjhb#else
392294368SjhbMODULE_DEPEND(systrace_linux, linux, 1, 1, 1);
393294368Sjhb#endif
394294368SjhbMODULE_DEPEND(systrace_linux, dtrace, 1, 1, 1);
395294368SjhbMODULE_DEPEND(systrace_linux, opensolaris, 1, 1, 1);
396294368Sjhb#elif defined(LINUX32_SYSTRACE)
397219561SavgDEV_MODULE(systrace_linux32, systrace_modevent, NULL);
398219561SavgMODULE_VERSION(systrace_linux32, 1);
399219561SavgMODULE_DEPEND(systrace_linux32, linux, 1, 1, 1);
400219561SavgMODULE_DEPEND(systrace_linux32, dtrace, 1, 1, 1);
401219561SavgMODULE_DEPEND(systrace_linux32, opensolaris, 1, 1, 1);
402219561Savg#elif defined(FREEBSD32_SYSTRACE)
403219561SavgDEV_MODULE(systrace_freebsd32, systrace_modevent, NULL);
404219561SavgMODULE_VERSION(systrace_freebsd32, 1);
405219561SavgMODULE_DEPEND(systrace_freebsd32, dtrace, 1, 1, 1);
406219561SavgMODULE_DEPEND(systrace_freebsd32, opensolaris, 1, 1, 1);
407184698Srodrigc#else
408179237SjbDEV_MODULE(systrace, systrace_modevent, NULL);
409179237SjbMODULE_VERSION(systrace, 1);
410179237SjbMODULE_DEPEND(systrace, dtrace, 1, 1, 1);
411179237SjbMODULE_DEPEND(systrace, opensolaris, 1, 1, 1);
412184698Srodrigc#endif
413