systrace.c revision 227441
1179237Sjb/* 2179237Sjb * CDDL HEADER START 3179237Sjb * 4179237Sjb * The contents of this file are subject to the terms of the 5179237Sjb * Common Development and Distribution License (the "License"). 6179237Sjb * You may not use this file except in compliance with the License. 7179237Sjb * 8179237Sjb * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9179237Sjb * or http://www.opensolaris.org/os/licensing. 10179237Sjb * See the License for the specific language governing permissions 11179237Sjb * and limitations under the License. 12179237Sjb * 13179237Sjb * When distributing Covered Code, include this CDDL HEADER in each 14179237Sjb * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15179237Sjb * If applicable, add the following below this CDDL HEADER, with the 16179237Sjb * fields enclosed by brackets "[]" replaced with your own identifying 17179237Sjb * information: Portions Copyright [yyyy] [name of copyright owner] 18179237Sjb * 19179237Sjb * CDDL HEADER END 20179237Sjb * 21179237Sjb * Portions Copyright 2006-2008 John Birrell jb@freebsd.org 22179237Sjb * 23179237Sjb * $FreeBSD: head/sys/cddl/dev/systrace/systrace.c 227441 2011-11-11 03:49:42Z rstone $ 24179237Sjb * 25179237Sjb */ 26179237Sjb 27179237Sjb/* 28179237Sjb * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 29179237Sjb * Use is subject to license terms. 30179237Sjb */ 31179237Sjb 32179237Sjb#include <sys/cdefs.h> 33179237Sjb#include <sys/param.h> 34179237Sjb#include <sys/systm.h> 35179237Sjb#include <sys/conf.h> 36179237Sjb#include <sys/cpuvar.h> 37179237Sjb#include <sys/fcntl.h> 38179237Sjb#include <sys/filio.h> 39179237Sjb#include <sys/kdb.h> 40179237Sjb#include <sys/kernel.h> 41179237Sjb#include <sys/kmem.h> 42179237Sjb#include <sys/kthread.h> 43179237Sjb#include <sys/limits.h> 44179237Sjb#include <sys/linker.h> 45179237Sjb#include <sys/lock.h> 46179237Sjb#include <sys/malloc.h> 47179237Sjb#include <sys/module.h> 48179237Sjb#include <sys/mutex.h> 49179237Sjb#include <sys/poll.h> 50179237Sjb#include <sys/proc.h> 51179237Sjb#include <sys/selinfo.h> 52179237Sjb#include <sys/smp.h> 53184698Srodrigc#include <sys/sysproto.h> 54179237Sjb#include <sys/sysent.h> 55179237Sjb#include <sys/uio.h> 56179237Sjb#include <sys/unistd.h> 57179237Sjb#include <machine/stdarg.h> 58179237Sjb 59179237Sjb#include <sys/dtrace.h> 60179237Sjb 61184698Srodrigc#ifdef LINUX_SYSTRACE 62219561Savg#if defined(__amd64__) 63219561Savg#include <amd64/linux32/linux.h> 64219561Savg#include <amd64/linux32/linux32_proto.h> 65219561Savg#include <amd64/linux32/linux32_syscalls.c> 66219561Savg#include <amd64/linux32/linux32_systrace_args.c> 67220437Sart#define MODNAME "linux32" 68219561Savg#elif defined(__i386__) 69219561Savg#include <i386/linux/linux.h> 70219561Savg#include <i386/linux/linux_proto.h> 71219561Savg#include <i386/linux/linux_syscalls.c> 72219561Savg#include <i386/linux/linux_systrace_args.c> 73220437Sart#define MODNAME "linux" 74219561Savg#else 75219561Savg#error Only i386 and amd64 are supported. 76219561Savg#endif 77184698Srodrigcextern struct sysent linux_sysent[]; 78184698Srodrigc#define MAXSYSCALL LINUX_SYS_MAXSYSCALL 79184698Srodrigc#define SYSCALLNAMES linux_syscallnames 80184698Srodrigc#define SYSENT linux_sysent 81219561Savg#elif defined(FREEBSD32_SYSTRACE) 82219561Savg/* 83219561Savg * The syscall arguments are processed into a DTrace argument array 84219561Savg * using a generated function. See sys/kern/makesyscalls.sh. 85219561Savg */ 86219561Savg#include <compat/freebsd32/freebsd32_proto.h> 87219561Savg#include <compat/freebsd32/freebsd32_util.h> 88219561Savg#include <compat/freebsd32/freebsd32_syscall.h> 89219561Savg#include <compat/freebsd32/freebsd32_systrace_args.c> 90219561Savgextern const char *freebsd32_syscallnames[]; 91219561Savg#define MODNAME "freebsd32" 92219561Savg#define MAXSYSCALL FREEBSD32_SYS_MAXSYSCALL 93219561Savg#define SYSCALLNAMES freebsd32_syscallnames 94219561Savg#define SYSENT freebsd32_sysent 95184698Srodrigc#else 96184698Srodrigc/* 97184698Srodrigc * The syscall arguments are processed into a DTrace argument array 98184698Srodrigc * using a generated function. See sys/kern/makesyscalls.sh. 99184698Srodrigc */ 100184698Srodrigc#include <sys/syscall.h> 101184698Srodrigc#include <kern/systrace_args.c> 102219561Savg#define MODNAME "freebsd" 103184698Srodrigc#define MAXSYSCALL SYS_MAXSYSCALL 104184698Srodrigc#define SYSCALLNAMES syscallnames 105184698Srodrigc#define SYSENT sysent 106184698Srodrigc#endif 107184698Srodrigc 108219561Savg#define PROVNAME "syscall" 109219561Savg#define DEVNAME "dtrace/systrace/" MODNAME 110219561Savg 111179237Sjb#define SYSTRACE_ARTIFICIAL_FRAMES 1 112179237Sjb 113179237Sjb#define SYSTRACE_SHIFT 16 114179237Sjb#define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT) 115179237Sjb#define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1)) 116179237Sjb#define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id)) 117179237Sjb#define SYSTRACE_RETURN(id) (id) 118179237Sjb 119184698Srodrigc#if ((1 << SYSTRACE_SHIFT) <= MAXSYSCALL) 120179237Sjb#error 1 << SYSTRACE_SHIFT must exceed number of system calls 121179237Sjb#endif 122179237Sjb 123179237Sjbstatic d_open_t systrace_open; 124179237Sjbstatic int systrace_unload(void); 125179237Sjbstatic void systrace_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *); 126179237Sjbstatic void systrace_provide(void *, dtrace_probedesc_t *); 127179237Sjbstatic void systrace_destroy(void *, dtrace_id_t, void *); 128179237Sjbstatic void systrace_enable(void *, dtrace_id_t, void *); 129179237Sjbstatic void systrace_disable(void *, dtrace_id_t, void *); 130179237Sjbstatic void systrace_load(void *); 131179237Sjb 132179237Sjbstatic struct cdevsw systrace_cdevsw = { 133179237Sjb .d_version = D_VERSION, 134179237Sjb .d_open = systrace_open, 135184698Srodrigc#ifdef LINUX_SYSTRACE 136220437Sart .d_name = "systrace_" MODNAME, 137184698Srodrigc#else 138179237Sjb .d_name = "systrace", 139184698Srodrigc#endif 140179237Sjb}; 141179237Sjb 142184698Srodrigcstatic union { 143184698Srodrigc const char **p_constnames; 144184698Srodrigc char **pp_syscallnames; 145184698Srodrigc} uglyhack = { SYSCALLNAMES }; 146184698Srodrigc 147179237Sjbstatic dtrace_pattr_t systrace_attr = { 148179237Sjb{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 149179237Sjb{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 150179237Sjb{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, 151179237Sjb{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 152179237Sjb{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, 153179237Sjb}; 154179237Sjb 155179237Sjbstatic dtrace_pops_t systrace_pops = { 156179237Sjb systrace_provide, 157179237Sjb NULL, 158179237Sjb systrace_enable, 159179237Sjb systrace_disable, 160179237Sjb NULL, 161179237Sjb NULL, 162179237Sjb systrace_getargdesc, 163179237Sjb NULL, 164179237Sjb NULL, 165179237Sjb systrace_destroy 166179237Sjb}; 167179237Sjb 168179237Sjbstatic struct cdev *systrace_cdev; 169179237Sjbstatic dtrace_provider_id_t systrace_id; 170179237Sjb 171184698Srodrigc#if !defined(LINUX_SYSTRACE) 172179237Sjb/* 173184698Srodrigc * Probe callback function. 174184698Srodrigc * 175184698Srodrigc * Note: This function is called for _all_ syscalls, regardless of which sysent 176184698Srodrigc * array the syscall comes from. It could be a standard syscall or a 177184698Srodrigc * compat syscall from something like Linux. 178179237Sjb */ 179179237Sjbstatic void 180211608Srpaulosystrace_probe(u_int32_t id, int sysnum, struct sysent *sysent, void *params, 181211608Srpaulo int ret) 182179237Sjb{ 183179237Sjb int n_args = 0; 184179237Sjb u_int64_t uargs[8]; 185179237Sjb 186211608Srpaulo memset(uargs, 0, sizeof(uargs)); 187179237Sjb /* 188184698Srodrigc * Check if this syscall has an argument conversion function 189184698Srodrigc * registered. 190179237Sjb */ 191211608Srpaulo if (params && sysent->sy_systrace_args_func != NULL) { 192179237Sjb /* 193179237Sjb * Convert the syscall parameters using the registered 194179237Sjb * function. 195179237Sjb */ 196184698Srodrigc (*sysent->sy_systrace_args_func)(sysnum, params, uargs, &n_args); 197211608Srpaulo } else if (params) { 198179237Sjb /* 199179237Sjb * Use the built-in system call argument conversion 200179237Sjb * function to translate the syscall structure fields 201184698Srodrigc * into the array of 64-bit values that DTrace 202179237Sjb * expects. 203179237Sjb */ 204179237Sjb systrace_args(sysnum, params, uargs, &n_args); 205211608Srpaulo } else { 206211608Srpaulo /* 207211608Srpaulo * Since params is NULL, this is a 'return' probe. 208211608Srpaulo * Set arg0 and arg1 as the return value of this syscall. 209211608Srpaulo */ 210211608Srpaulo uargs[0] = uargs[1] = ret; 211211608Srpaulo } 212179237Sjb 213179237Sjb /* Process the probe using the converted argments. */ 214179237Sjb dtrace_probe(id, uargs[0], uargs[1], uargs[2], uargs[3], uargs[4]); 215179237Sjb} 216227441Srstone 217184698Srodrigc#endif 218179237Sjb 219179237Sjbstatic void 220179237Sjbsystrace_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc) 221179237Sjb{ 222179237Sjb int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 223179237Sjb 224227441Srstone if (SYSTRACE_ISENTRY((uintptr_t)parg)) 225227441Srstone systrace_entry_setargdesc(sysnum, desc->dtargd_ndx, 226227441Srstone desc->dtargd_native, sizeof(desc->dtargd_native)); 227227441Srstone else 228227441Srstone systrace_return_setargdesc(sysnum, desc->dtargd_ndx, 229227441Srstone desc->dtargd_native, sizeof(desc->dtargd_native)); 230179237Sjb 231179237Sjb if (desc->dtargd_native[0] == '\0') 232179237Sjb desc->dtargd_ndx = DTRACE_ARGNONE; 233179237Sjb 234179237Sjb return; 235179237Sjb} 236179237Sjb 237179237Sjbstatic void 238179237Sjbsystrace_provide(void *arg, dtrace_probedesc_t *desc) 239179237Sjb{ 240179237Sjb int i; 241179237Sjb 242179237Sjb if (desc != NULL) 243179237Sjb return; 244179237Sjb 245184698Srodrigc for (i = 0; i < MAXSYSCALL; i++) { 246219561Savg if (dtrace_probe_lookup(systrace_id, MODNAME, 247184698Srodrigc uglyhack.pp_syscallnames[i], "entry") != 0) 248179237Sjb continue; 249179237Sjb 250219561Savg (void) dtrace_probe_create(systrace_id, MODNAME, uglyhack.pp_syscallnames[i], 251179237Sjb "entry", SYSTRACE_ARTIFICIAL_FRAMES, 252179237Sjb (void *)((uintptr_t)SYSTRACE_ENTRY(i))); 253219561Savg (void) dtrace_probe_create(systrace_id, MODNAME, uglyhack.pp_syscallnames[i], 254179237Sjb "return", SYSTRACE_ARTIFICIAL_FRAMES, 255179237Sjb (void *)((uintptr_t)SYSTRACE_RETURN(i))); 256179237Sjb } 257179237Sjb} 258179237Sjb 259179237Sjbstatic void 260179237Sjbsystrace_destroy(void *arg, dtrace_id_t id, void *parg) 261179237Sjb{ 262179237Sjb#ifdef DEBUG 263179237Sjb int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 264179237Sjb 265179237Sjb /* 266179237Sjb * There's nothing to do here but assert that we have actually been 267179237Sjb * disabled. 268179237Sjb */ 269179237Sjb if (SYSTRACE_ISENTRY((uintptr_t)parg)) { 270179237Sjb ASSERT(sysent[sysnum].sy_entry == 0); 271179237Sjb } else { 272179237Sjb ASSERT(sysent[sysnum].sy_return == 0); 273179237Sjb } 274179237Sjb#endif 275179237Sjb} 276179237Sjb 277179237Sjbstatic void 278179237Sjbsystrace_enable(void *arg, dtrace_id_t id, void *parg) 279179237Sjb{ 280179237Sjb int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 281179237Sjb 282184698Srodrigc if (SYSENT[sysnum].sy_systrace_args_func == NULL) 283184698Srodrigc SYSENT[sysnum].sy_systrace_args_func = systrace_args; 284184698Srodrigc 285179237Sjb if (SYSTRACE_ISENTRY((uintptr_t)parg)) 286184698Srodrigc SYSENT[sysnum].sy_entry = id; 287179237Sjb else 288184698Srodrigc SYSENT[sysnum].sy_return = id; 289179237Sjb} 290179237Sjb 291179237Sjbstatic void 292179237Sjbsystrace_disable(void *arg, dtrace_id_t id, void *parg) 293179237Sjb{ 294179237Sjb int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 295179237Sjb 296184698Srodrigc SYSENT[sysnum].sy_entry = 0; 297184698Srodrigc SYSENT[sysnum].sy_return = 0; 298179237Sjb} 299179237Sjb 300179237Sjbstatic void 301179237Sjbsystrace_load(void *dummy) 302179237Sjb{ 303179237Sjb /* Create the /dev/dtrace/systrace entry. */ 304179237Sjb systrace_cdev = make_dev(&systrace_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, 305184698Srodrigc DEVNAME); 306179237Sjb 307184698Srodrigc if (dtrace_register(PROVNAME, &systrace_attr, DTRACE_PRIV_USER, 308179237Sjb NULL, &systrace_pops, NULL, &systrace_id) != 0) 309179237Sjb return; 310179237Sjb 311184698Srodrigc#if !defined(LINUX_SYSTRACE) 312179237Sjb systrace_probe_func = systrace_probe; 313184698Srodrigc#endif 314179237Sjb} 315179237Sjb 316179237Sjb 317179237Sjbstatic int 318179237Sjbsystrace_unload() 319179237Sjb{ 320179237Sjb int error = 0; 321179237Sjb 322179237Sjb if ((error = dtrace_unregister(systrace_id)) != 0) 323179237Sjb return (error); 324179237Sjb 325184698Srodrigc#if !defined(LINUX_SYSTRACE) 326179237Sjb systrace_probe_func = NULL; 327184698Srodrigc#endif 328179237Sjb 329179237Sjb destroy_dev(systrace_cdev); 330179237Sjb 331179237Sjb return (error); 332179237Sjb} 333179237Sjb 334179237Sjbstatic int 335179237Sjbsystrace_modevent(module_t mod __unused, int type, void *data __unused) 336179237Sjb{ 337179237Sjb int error = 0; 338179237Sjb 339179237Sjb switch (type) { 340179237Sjb case MOD_LOAD: 341179237Sjb break; 342179237Sjb 343179237Sjb case MOD_UNLOAD: 344179237Sjb break; 345179237Sjb 346179237Sjb case MOD_SHUTDOWN: 347179237Sjb break; 348179237Sjb 349179237Sjb default: 350179237Sjb error = EOPNOTSUPP; 351179237Sjb break; 352179237Sjb 353179237Sjb } 354179237Sjb return (error); 355179237Sjb} 356179237Sjb 357179237Sjbstatic int 358179237Sjbsystrace_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused) 359179237Sjb{ 360179237Sjb return (0); 361179237Sjb} 362179237Sjb 363179237SjbSYSINIT(systrace_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, systrace_load, NULL); 364179237SjbSYSUNINIT(systrace_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, systrace_unload, NULL); 365179237Sjb 366184698Srodrigc#ifdef LINUX_SYSTRACE 367219561SavgDEV_MODULE(systrace_linux32, systrace_modevent, NULL); 368219561SavgMODULE_VERSION(systrace_linux32, 1); 369219561SavgMODULE_DEPEND(systrace_linux32, linux, 1, 1, 1); 370219561SavgMODULE_DEPEND(systrace_linux32, dtrace, 1, 1, 1); 371219561SavgMODULE_DEPEND(systrace_linux32, opensolaris, 1, 1, 1); 372219561Savg#elif defined(FREEBSD32_SYSTRACE) 373219561SavgDEV_MODULE(systrace_freebsd32, systrace_modevent, NULL); 374219561SavgMODULE_VERSION(systrace_freebsd32, 1); 375219561SavgMODULE_DEPEND(systrace_freebsd32, dtrace, 1, 1, 1); 376219561SavgMODULE_DEPEND(systrace_freebsd32, opensolaris, 1, 1, 1); 377184698Srodrigc#else 378179237SjbDEV_MODULE(systrace, systrace_modevent, NULL); 379179237SjbMODULE_VERSION(systrace, 1); 380179237SjbMODULE_DEPEND(systrace, dtrace, 1, 1, 1); 381179237SjbMODULE_DEPEND(systrace, opensolaris, 1, 1, 1); 382184698Srodrigc#endif 383