1179237Sjb/* 2179237Sjb * CDDL HEADER START 3179237Sjb * 4179237Sjb * The contents of this file are subject to the terms of the 5179237Sjb * Common Development and Distribution License (the "License"). 6179237Sjb * You may not use this file except in compliance with the License. 7179237Sjb * 8179237Sjb * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9179237Sjb * or http://www.opensolaris.org/os/licensing. 10179237Sjb * See the License for the specific language governing permissions 11179237Sjb * and limitations under the License. 12179237Sjb * 13179237Sjb * When distributing Covered Code, include this CDDL HEADER in each 14179237Sjb * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15179237Sjb * If applicable, add the following below this CDDL HEADER, with the 16179237Sjb * fields enclosed by brackets "[]" replaced with your own identifying 17179237Sjb * information: Portions Copyright [yyyy] [name of copyright owner] 18179237Sjb * 19179237Sjb * CDDL HEADER END 20179237Sjb * 21179237Sjb * Portions Copyright 2006-2008 John Birrell jb@freebsd.org 22179237Sjb * 23179237Sjb * $FreeBSD$ 24179237Sjb * 25179237Sjb */ 26179237Sjb 27179237Sjb/* 28179237Sjb * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 29179237Sjb * Use is subject to license terms. 30179237Sjb */ 31179237Sjb 32179237Sjb#include <sys/cdefs.h> 33179237Sjb#include <sys/param.h> 34179237Sjb#include <sys/systm.h> 35179237Sjb#include <sys/conf.h> 36179237Sjb#include <sys/cpuvar.h> 37179237Sjb#include <sys/fcntl.h> 38179237Sjb#include <sys/filio.h> 39179237Sjb#include <sys/kdb.h> 40179237Sjb#include <sys/kernel.h> 41179237Sjb#include <sys/kmem.h> 42179237Sjb#include <sys/kthread.h> 43179237Sjb#include <sys/limits.h> 44179237Sjb#include <sys/linker.h> 45179237Sjb#include <sys/lock.h> 46179237Sjb#include <sys/malloc.h> 47179237Sjb#include <sys/module.h> 48179237Sjb#include <sys/mutex.h> 49179237Sjb#include <sys/poll.h> 50179237Sjb#include <sys/proc.h> 51179237Sjb#include <sys/selinfo.h> 52179237Sjb#include <sys/smp.h> 53184698Srodrigc#include <sys/sysproto.h> 54179237Sjb#include <sys/sysent.h> 55179237Sjb#include <sys/uio.h> 56179237Sjb#include <sys/unistd.h> 57179237Sjb#include <machine/stdarg.h> 58179237Sjb 59179237Sjb#include <sys/dtrace.h> 60179237Sjb 61184698Srodrigc#ifdef LINUX_SYSTRACE 62219561Savg#if defined(__amd64__) 63294368Sjhb#include <amd64/linux/linux.h> 64294368Sjhb#include <amd64/linux/linux_proto.h> 65294368Sjhb#include <amd64/linux/linux_syscalls.c> 66294368Sjhb#include <amd64/linux/linux_systrace_args.c> 67219561Savg#elif defined(__i386__) 68219561Savg#include <i386/linux/linux.h> 69219561Savg#include <i386/linux/linux_proto.h> 70219561Savg#include <i386/linux/linux_syscalls.c> 71219561Savg#include <i386/linux/linux_systrace_args.c> 72219561Savg#else 73219561Savg#error Only i386 and amd64 are supported. 74219561Savg#endif 75294368Sjhb#define MODNAME "linux" 76184698Srodrigcextern struct sysent linux_sysent[]; 77184698Srodrigc#define MAXSYSCALL LINUX_SYS_MAXSYSCALL 78184698Srodrigc#define SYSCALLNAMES linux_syscallnames 79184698Srodrigc#define SYSENT linux_sysent 80294368Sjhb#elif defined(LINUX32_SYSTRACE) 81294368Sjhb#if defined(__amd64__) 82294368Sjhb#include <amd64/linux32/linux.h> 83294368Sjhb#include <amd64/linux32/linux32_proto.h> 84294368Sjhb#include <amd64/linux32/linux32_syscalls.c> 85294368Sjhb#include <amd64/linux32/linux32_systrace_args.c> 86294368Sjhb#else 87294368Sjhb#error Only amd64 is supported. 88294368Sjhb#endif 89294368Sjhb#define MODNAME "linux32" 90294368Sjhbextern struct sysent linux32_sysent[]; 91294368Sjhb#define MAXSYSCALL LINUX32_SYS_MAXSYSCALL 92294368Sjhb#define SYSCALLNAMES linux32_syscallnames 93294368Sjhb#define SYSENT linux32_sysent 94219561Savg#elif defined(FREEBSD32_SYSTRACE) 95219561Savg/* 96219561Savg * The syscall arguments are processed into a DTrace argument array 97219561Savg * using a generated function. See sys/kern/makesyscalls.sh. 98219561Savg */ 99219561Savg#include <compat/freebsd32/freebsd32_proto.h> 100219561Savg#include <compat/freebsd32/freebsd32_util.h> 101219561Savg#include <compat/freebsd32/freebsd32_syscall.h> 102219561Savg#include <compat/freebsd32/freebsd32_systrace_args.c> 103219561Savgextern const char *freebsd32_syscallnames[]; 104219561Savg#define MODNAME "freebsd32" 105219561Savg#define MAXSYSCALL FREEBSD32_SYS_MAXSYSCALL 106219561Savg#define SYSCALLNAMES freebsd32_syscallnames 107219561Savg#define SYSENT freebsd32_sysent 108184698Srodrigc#else 109184698Srodrigc/* 110184698Srodrigc * The syscall arguments are processed into a DTrace argument array 111184698Srodrigc * using a generated function. See sys/kern/makesyscalls.sh. 112184698Srodrigc */ 113184698Srodrigc#include <sys/syscall.h> 114184698Srodrigc#include <kern/systrace_args.c> 115219561Savg#define MODNAME "freebsd" 116184698Srodrigc#define MAXSYSCALL SYS_MAXSYSCALL 117184698Srodrigc#define SYSCALLNAMES syscallnames 118184698Srodrigc#define SYSENT sysent 119294368Sjhb#define NATIVE_ABI 120184698Srodrigc#endif 121184698Srodrigc 122219561Savg#define PROVNAME "syscall" 123219561Savg#define DEVNAME "dtrace/systrace/" MODNAME 124219561Savg 125179237Sjb#define SYSTRACE_ARTIFICIAL_FRAMES 1 126179237Sjb 127179237Sjb#define SYSTRACE_SHIFT 16 128179237Sjb#define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT) 129179237Sjb#define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1)) 130179237Sjb#define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id)) 131179237Sjb#define SYSTRACE_RETURN(id) (id) 132179237Sjb 133184698Srodrigc#if ((1 << SYSTRACE_SHIFT) <= MAXSYSCALL) 134179237Sjb#error 1 << SYSTRACE_SHIFT must exceed number of system calls 135179237Sjb#endif 136179237Sjb 137179237Sjbstatic d_open_t systrace_open; 138179237Sjbstatic int systrace_unload(void); 139179237Sjbstatic void systrace_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *); 140179237Sjbstatic void systrace_provide(void *, dtrace_probedesc_t *); 141179237Sjbstatic void systrace_destroy(void *, dtrace_id_t, void *); 142179237Sjbstatic void systrace_enable(void *, dtrace_id_t, void *); 143179237Sjbstatic void systrace_disable(void *, dtrace_id_t, void *); 144179237Sjbstatic void systrace_load(void *); 145179237Sjb 146179237Sjbstatic struct cdevsw systrace_cdevsw = { 147179237Sjb .d_version = D_VERSION, 148179237Sjb .d_open = systrace_open, 149294368Sjhb#ifndef NATIVE_ABI 150220437Sart .d_name = "systrace_" MODNAME, 151184698Srodrigc#else 152179237Sjb .d_name = "systrace", 153184698Srodrigc#endif 154179237Sjb}; 155179237Sjb 156184698Srodrigcstatic union { 157184698Srodrigc const char **p_constnames; 158184698Srodrigc char **pp_syscallnames; 159184698Srodrigc} uglyhack = { SYSCALLNAMES }; 160184698Srodrigc 161179237Sjbstatic dtrace_pattr_t systrace_attr = { 162179237Sjb{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 163179237Sjb{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 164179237Sjb{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, 165179237Sjb{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 166179237Sjb{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, 167179237Sjb}; 168179237Sjb 169179237Sjbstatic dtrace_pops_t systrace_pops = { 170179237Sjb systrace_provide, 171179237Sjb NULL, 172179237Sjb systrace_enable, 173179237Sjb systrace_disable, 174179237Sjb NULL, 175179237Sjb NULL, 176179237Sjb systrace_getargdesc, 177179237Sjb NULL, 178179237Sjb NULL, 179179237Sjb systrace_destroy 180179237Sjb}; 181179237Sjb 182179237Sjbstatic struct cdev *systrace_cdev; 183179237Sjbstatic dtrace_provider_id_t systrace_id; 184179237Sjb 185269272Smarkjtypedef void (*systrace_dtrace_probe_t)(dtrace_id_t, uintptr_t, uintptr_t, 186269272Smarkj uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); 187269272Smarkj 188294368Sjhb#ifdef NATIVE_ABI 189179237Sjb/* 190184698Srodrigc * Probe callback function. 191184698Srodrigc * 192184698Srodrigc * Note: This function is called for _all_ syscalls, regardless of which sysent 193184698Srodrigc * array the syscall comes from. It could be a standard syscall or a 194184698Srodrigc * compat syscall from something like Linux. 195179237Sjb */ 196179237Sjbstatic void 197211608Srpaulosystrace_probe(u_int32_t id, int sysnum, struct sysent *sysent, void *params, 198211608Srpaulo int ret) 199179237Sjb{ 200269272Smarkj systrace_dtrace_probe_t probe; 201179237Sjb int n_args = 0; 202179237Sjb u_int64_t uargs[8]; 203179237Sjb 204211608Srpaulo memset(uargs, 0, sizeof(uargs)); 205179237Sjb /* 206184698Srodrigc * Check if this syscall has an argument conversion function 207184698Srodrigc * registered. 208179237Sjb */ 209211608Srpaulo if (params && sysent->sy_systrace_args_func != NULL) { 210179237Sjb /* 211179237Sjb * Convert the syscall parameters using the registered 212179237Sjb * function. 213179237Sjb */ 214184698Srodrigc (*sysent->sy_systrace_args_func)(sysnum, params, uargs, &n_args); 215211608Srpaulo } else if (params) { 216179237Sjb /* 217179237Sjb * Use the built-in system call argument conversion 218179237Sjb * function to translate the syscall structure fields 219184698Srodrigc * into the array of 64-bit values that DTrace 220179237Sjb * expects. 221179237Sjb */ 222179237Sjb systrace_args(sysnum, params, uargs, &n_args); 223211608Srpaulo } else { 224211608Srpaulo /* 225211608Srpaulo * Since params is NULL, this is a 'return' probe. 226211608Srpaulo * Set arg0 and arg1 as the return value of this syscall. 227211608Srpaulo */ 228211608Srpaulo uargs[0] = uargs[1] = ret; 229211608Srpaulo } 230179237Sjb 231179237Sjb /* Process the probe using the converted argments. */ 232269272Smarkj probe = (systrace_dtrace_probe_t)dtrace_probe; 233269272Smarkj probe(id, uargs[0], uargs[1], uargs[2], uargs[3], uargs[4], uargs[5], 234269272Smarkj uargs[6], uargs[7]); 235179237Sjb} 236227441Srstone 237184698Srodrigc#endif 238179237Sjb 239179237Sjbstatic void 240179237Sjbsystrace_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc) 241179237Sjb{ 242179237Sjb int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 243179237Sjb 244227441Srstone if (SYSTRACE_ISENTRY((uintptr_t)parg)) 245227441Srstone systrace_entry_setargdesc(sysnum, desc->dtargd_ndx, 246227441Srstone desc->dtargd_native, sizeof(desc->dtargd_native)); 247227441Srstone else 248227441Srstone systrace_return_setargdesc(sysnum, desc->dtargd_ndx, 249227441Srstone desc->dtargd_native, sizeof(desc->dtargd_native)); 250179237Sjb 251179237Sjb if (desc->dtargd_native[0] == '\0') 252179237Sjb desc->dtargd_ndx = DTRACE_ARGNONE; 253179237Sjb 254179237Sjb return; 255179237Sjb} 256179237Sjb 257179237Sjbstatic void 258179237Sjbsystrace_provide(void *arg, dtrace_probedesc_t *desc) 259179237Sjb{ 260179237Sjb int i; 261179237Sjb 262179237Sjb if (desc != NULL) 263179237Sjb return; 264179237Sjb 265184698Srodrigc for (i = 0; i < MAXSYSCALL; i++) { 266219561Savg if (dtrace_probe_lookup(systrace_id, MODNAME, 267184698Srodrigc uglyhack.pp_syscallnames[i], "entry") != 0) 268179237Sjb continue; 269179237Sjb 270219561Savg (void) dtrace_probe_create(systrace_id, MODNAME, uglyhack.pp_syscallnames[i], 271179237Sjb "entry", SYSTRACE_ARTIFICIAL_FRAMES, 272179237Sjb (void *)((uintptr_t)SYSTRACE_ENTRY(i))); 273219561Savg (void) dtrace_probe_create(systrace_id, MODNAME, uglyhack.pp_syscallnames[i], 274179237Sjb "return", SYSTRACE_ARTIFICIAL_FRAMES, 275179237Sjb (void *)((uintptr_t)SYSTRACE_RETURN(i))); 276179237Sjb } 277179237Sjb} 278179237Sjb 279179237Sjbstatic void 280179237Sjbsystrace_destroy(void *arg, dtrace_id_t id, void *parg) 281179237Sjb{ 282179237Sjb#ifdef DEBUG 283179237Sjb int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 284179237Sjb 285179237Sjb /* 286179237Sjb * There's nothing to do here but assert that we have actually been 287179237Sjb * disabled. 288179237Sjb */ 289179237Sjb if (SYSTRACE_ISENTRY((uintptr_t)parg)) { 290179237Sjb ASSERT(sysent[sysnum].sy_entry == 0); 291179237Sjb } else { 292179237Sjb ASSERT(sysent[sysnum].sy_return == 0); 293179237Sjb } 294179237Sjb#endif 295179237Sjb} 296179237Sjb 297179237Sjbstatic void 298179237Sjbsystrace_enable(void *arg, dtrace_id_t id, void *parg) 299179237Sjb{ 300179237Sjb int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 301179237Sjb 302184698Srodrigc if (SYSENT[sysnum].sy_systrace_args_func == NULL) 303184698Srodrigc SYSENT[sysnum].sy_systrace_args_func = systrace_args; 304184698Srodrigc 305179237Sjb if (SYSTRACE_ISENTRY((uintptr_t)parg)) 306184698Srodrigc SYSENT[sysnum].sy_entry = id; 307179237Sjb else 308184698Srodrigc SYSENT[sysnum].sy_return = id; 309179237Sjb} 310179237Sjb 311179237Sjbstatic void 312179237Sjbsystrace_disable(void *arg, dtrace_id_t id, void *parg) 313179237Sjb{ 314179237Sjb int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 315179237Sjb 316184698Srodrigc SYSENT[sysnum].sy_entry = 0; 317184698Srodrigc SYSENT[sysnum].sy_return = 0; 318179237Sjb} 319179237Sjb 320179237Sjbstatic void 321179237Sjbsystrace_load(void *dummy) 322179237Sjb{ 323179237Sjb /* Create the /dev/dtrace/systrace entry. */ 324179237Sjb systrace_cdev = make_dev(&systrace_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, 325184698Srodrigc DEVNAME); 326179237Sjb 327184698Srodrigc if (dtrace_register(PROVNAME, &systrace_attr, DTRACE_PRIV_USER, 328179237Sjb NULL, &systrace_pops, NULL, &systrace_id) != 0) 329179237Sjb return; 330179237Sjb 331294368Sjhb#ifdef NATIVE_ABI 332179237Sjb systrace_probe_func = systrace_probe; 333184698Srodrigc#endif 334179237Sjb} 335179237Sjb 336179237Sjb 337179237Sjbstatic int 338179237Sjbsystrace_unload() 339179237Sjb{ 340179237Sjb int error = 0; 341179237Sjb 342179237Sjb if ((error = dtrace_unregister(systrace_id)) != 0) 343179237Sjb return (error); 344179237Sjb 345294368Sjhb#ifdef NATIVE_ABI 346179237Sjb systrace_probe_func = NULL; 347184698Srodrigc#endif 348179237Sjb 349179237Sjb destroy_dev(systrace_cdev); 350179237Sjb 351179237Sjb return (error); 352179237Sjb} 353179237Sjb 354179237Sjbstatic int 355179237Sjbsystrace_modevent(module_t mod __unused, int type, void *data __unused) 356179237Sjb{ 357179237Sjb int error = 0; 358179237Sjb 359179237Sjb switch (type) { 360179237Sjb case MOD_LOAD: 361179237Sjb break; 362179237Sjb 363179237Sjb case MOD_UNLOAD: 364179237Sjb break; 365179237Sjb 366179237Sjb case MOD_SHUTDOWN: 367179237Sjb break; 368179237Sjb 369179237Sjb default: 370179237Sjb error = EOPNOTSUPP; 371179237Sjb break; 372179237Sjb 373179237Sjb } 374179237Sjb return (error); 375179237Sjb} 376179237Sjb 377179237Sjbstatic int 378179237Sjbsystrace_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused) 379179237Sjb{ 380179237Sjb return (0); 381179237Sjb} 382179237Sjb 383179237SjbSYSINIT(systrace_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, systrace_load, NULL); 384179237SjbSYSUNINIT(systrace_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, systrace_unload, NULL); 385179237Sjb 386184698Srodrigc#ifdef LINUX_SYSTRACE 387294368SjhbDEV_MODULE(systrace_linux, systrace_modevent, NULL); 388294368SjhbMODULE_VERSION(systrace_linux, 1); 389294368Sjhb#ifdef __amd64__ 390294368SjhbMODULE_DEPEND(systrace_linux, linux64, 1, 1, 1); 391294368Sjhb#else 392294368SjhbMODULE_DEPEND(systrace_linux, linux, 1, 1, 1); 393294368Sjhb#endif 394294368SjhbMODULE_DEPEND(systrace_linux, dtrace, 1, 1, 1); 395294368SjhbMODULE_DEPEND(systrace_linux, opensolaris, 1, 1, 1); 396294368Sjhb#elif defined(LINUX32_SYSTRACE) 397219561SavgDEV_MODULE(systrace_linux32, systrace_modevent, NULL); 398219561SavgMODULE_VERSION(systrace_linux32, 1); 399219561SavgMODULE_DEPEND(systrace_linux32, linux, 1, 1, 1); 400219561SavgMODULE_DEPEND(systrace_linux32, dtrace, 1, 1, 1); 401219561SavgMODULE_DEPEND(systrace_linux32, opensolaris, 1, 1, 1); 402219561Savg#elif defined(FREEBSD32_SYSTRACE) 403219561SavgDEV_MODULE(systrace_freebsd32, systrace_modevent, NULL); 404219561SavgMODULE_VERSION(systrace_freebsd32, 1); 405219561SavgMODULE_DEPEND(systrace_freebsd32, dtrace, 1, 1, 1); 406219561SavgMODULE_DEPEND(systrace_freebsd32, opensolaris, 1, 1, 1); 407184698Srodrigc#else 408179237SjbDEV_MODULE(systrace, systrace_modevent, NULL); 409179237SjbMODULE_VERSION(systrace, 1); 410179237SjbMODULE_DEPEND(systrace, dtrace, 1, 1, 1); 411179237SjbMODULE_DEPEND(systrace, opensolaris, 1, 1, 1); 412184698Srodrigc#endif 413