1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26/* #pragma ident "@(#)systrace.c 1.6 06/09/19 SMI" */ 27 28#if !defined(__APPLE__) 29#include <sys/dtrace.h> 30#include <sys/systrace.h> 31#include <sys/stat.h> 32#include <sys/systm.h> 33#include <sys/conf.h> 34#include <sys/ddi.h> 35#include <sys/sunddi.h> 36#include <sys/atomic.h> 37#define SYSTRACE_ARTIFICIAL_FRAMES 1 38#else 39 40#ifdef KERNEL 41#ifndef _KERNEL 42#define _KERNEL /* Solaris vs. Darwin */ 43#endif 44#endif 45 46#define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */ 47#include <kern/thread.h> 48#include <mach/thread_status.h> 49/* XXX All of these should really be derived from syscall_sw.h */ 50#if defined(__i386__) || defined (__x86_64__) 51#define SYSCALL_CLASS_SHIFT 24 52#define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT) 53#define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK) 54#define I386_SYSCALL_NUMBER_MASK (0xFFFF) 55 56typedef x86_saved_state_t savearea_t; 57#endif 58 59#include <sys/param.h> 60#include <sys/systm.h> 61#include <sys/proc.h> 62#include <sys/errno.h> 63#include <sys/ioctl.h> 64#include <sys/conf.h> 65#include <sys/fcntl.h> 66#include <miscfs/devfs/devfs.h> 67 68#include <sys/dtrace.h> 69#include <sys/dtrace_impl.h> 70#include "systrace.h" 71#include <sys/stat.h> 72#include <sys/systm.h> 73#include <sys/conf.h> 74#include <sys/user.h> 75 76#include <machine/pal_routines.h> 77 78#if defined(__i386__) || defined (__x86_64__) 79#define SYSTRACE_ARTIFICIAL_FRAMES 2 80#define MACHTRACE_ARTIFICIAL_FRAMES 3 81#else 82#error Unknown Architecture 83#endif 84 85#include <sys/sysent.h> 86#define sy_callc sy_call /* Map Solaris slot name to Darwin's */ 87#define NSYSCALL nsysent /* and is less than 500 or so */ 88 89extern const char *syscallnames[]; 90 91#include <sys/dtrace_glue.h> 92#define casptr dtrace_casptr 93#define membar_enter dtrace_membar_producer 94 95#define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */ 96#define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */ 97 98extern lck_attr_t* dtrace_lck_attr; 99extern lck_grp_t* dtrace_lck_grp; 100static lck_mtx_t dtrace_systrace_lock; /* probe state lock */ 101 102systrace_sysent_t *systrace_sysent = NULL; 103void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t); 104 105static uint64_t systrace_getarg(void *, dtrace_id_t, void *, int, int); 106 107void 108systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1, 109 uint64_t arg2, uint64_t arg3, uint64_t arg4) 110{ 111#pragma unused(id,arg0,arg1,arg2,arg3,arg4) 112} 113 114int32_t 115dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv) 116{ 117 boolean_t flavor; 118 unsigned short code; 119 120 systrace_sysent_t *sy; 121 dtrace_id_t id; 122 int32_t rval; 123#if 0 /* XXX */ 124 proc_t *p; 125#endif 126 syscall_arg_t *ip = (syscall_arg_t *)uap; 127 128#if defined(__i386__) || defined (__x86_64__) 129#pragma unused(flavor) 130 { 131 pal_register_cache_state(current_thread(), VALID); 132 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread()); 133 134 if (is_saved_state64(tagged_regs)) { 135 x86_saved_state64_t *regs = saved_state64(tagged_regs); 136 code = regs->rax & SYSCALL_NUMBER_MASK; 137 /* 138 * Check for indirect system call... system call number 139 * passed as 'arg0' 140 */ 141 if (code == 0) { 142 code = regs->rdi; 143 } 144 } else { 145 code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK; 146 147 if (code == 0) { 148 vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof (int)); 149 code = fuword(params); 150 } 151 } 152 } 153#else 154#error Unknown Architecture 155#endif 156 157 // Bounds "check" the value of code a la unix_syscall 158 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code]; 159 160 if ((id = sy->stsy_entry) != DTRACE_IDNONE) { 161 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); 162 if (uthread) 163 uthread->t_dtrace_syscall_args = (void *)ip; 164 165 if (ip) 166 (*systrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4)); 167 else 168 (*systrace_probe)(id, 0, 0, 0, 0, 0); 169 170 if (uthread) 171 uthread->t_dtrace_syscall_args = (void *)0; 172 } 173 174#if 0 /* XXX */ 175 /* 176 * We want to explicitly allow DTrace consumers to stop a process 177 * before it actually executes the meat of the syscall. 178 */ 179 p = ttoproc(curthread); 180 mutex_enter(&p->p_lock); 181 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) { 182 curthread->t_dtrace_stop = 0; 183 stop(PR_REQUESTED, 0); 184 } 185 mutex_exit(&p->p_lock); 186#endif 187 188 rval = (*sy->stsy_underlying)(pp, uap, rv); 189 190 if ((id = sy->stsy_return) != DTRACE_IDNONE) { 191 uint64_t munged_rv0, munged_rv1; 192 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); 193 194 if (uthread) 195 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */ 196 197 /* 198 * "Decode" rv for use in the call to dtrace_probe() 199 */ 200 if (rval == ERESTART) { 201 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */ 202 munged_rv1 = -1LL; 203 } else if (rval != EJUSTRETURN) { 204 if (rval) { 205 munged_rv0 = -1LL; /* Mimic what libc will do. */ 206 munged_rv1 = -1LL; 207 } else { 208 switch (sy->stsy_return_type) { 209 case _SYSCALL_RET_INT_T: 210 munged_rv0 = rv[0]; 211 munged_rv1 = rv[1]; 212 break; 213 case _SYSCALL_RET_UINT_T: 214 munged_rv0 = ((u_int)rv[0]); 215 munged_rv1 = ((u_int)rv[1]); 216 break; 217 case _SYSCALL_RET_OFF_T: 218 case _SYSCALL_RET_UINT64_T: 219 munged_rv0 = *(u_int64_t *)rv; 220 munged_rv1 = 0LL; 221 break; 222 case _SYSCALL_RET_ADDR_T: 223 case _SYSCALL_RET_SIZE_T: 224 case _SYSCALL_RET_SSIZE_T: 225 munged_rv0 = *(user_addr_t *)rv; 226 munged_rv1 = 0LL; 227 break; 228 case _SYSCALL_RET_NONE: 229 munged_rv0 = 0LL; 230 munged_rv1 = 0LL; 231 break; 232 default: 233 munged_rv0 = 0LL; 234 munged_rv1 = 0LL; 235 break; 236 } 237 } 238 } else { 239 munged_rv0 = 0LL; 240 munged_rv1 = 0LL; 241 } 242 243 /* 244 * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says: 245 * 246 * "This is a bit of an historical artifact. At first, the syscall provider just 247 * had its return value in arg0, and the fbt and pid providers had their return 248 * values in arg1 (so that we could use arg0 for the offset of the return site). 249 * 250 * We inevitably started writing scripts where we wanted to see the return 251 * values from probes in all three providers, and we made this script easier 252 * to write by replicating the syscall return values in arg1 to match fbt and 253 * pid. We debated briefly about removing the return value from arg0, but 254 * decided that it would be less confusing to have the same data in two places 255 * than to have some non-helpful, non-intuitive value in arg0. 256 * 257 * This change was made 4/23/2003 according to the DTrace project's putback log." 258 */ 259 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0); 260 } 261 262 return (rval); 263} 264 265void 266dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv) 267{ 268 systrace_sysent_t *sy; 269 dtrace_id_t id; 270 271 // Bounds "check" the value of code a la unix_syscall_return 272 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code]; 273 274 if ((id = sy->stsy_return) != DTRACE_IDNONE) { 275 uint64_t munged_rv0, munged_rv1; 276 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); 277 278 if (uthread) 279 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */ 280 281 /* 282 * "Decode" rv for use in the call to dtrace_probe() 283 */ 284 if (rval == ERESTART) { 285 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */ 286 munged_rv1 = -1LL; 287 } else if (rval != EJUSTRETURN) { 288 if (rval) { 289 munged_rv0 = -1LL; /* Mimic what libc will do. */ 290 munged_rv1 = -1LL; 291 } else { 292 switch (sy->stsy_return_type) { 293 case _SYSCALL_RET_INT_T: 294 munged_rv0 = rv[0]; 295 munged_rv1 = rv[1]; 296 break; 297 case _SYSCALL_RET_UINT_T: 298 munged_rv0 = ((u_int)rv[0]); 299 munged_rv1 = ((u_int)rv[1]); 300 break; 301 case _SYSCALL_RET_OFF_T: 302 case _SYSCALL_RET_UINT64_T: 303 munged_rv0 = *(u_int64_t *)rv; 304 munged_rv1 = 0LL; 305 break; 306 case _SYSCALL_RET_ADDR_T: 307 case _SYSCALL_RET_SIZE_T: 308 case _SYSCALL_RET_SSIZE_T: 309 munged_rv0 = *(user_addr_t *)rv; 310 munged_rv1 = 0LL; 311 break; 312 case _SYSCALL_RET_NONE: 313 munged_rv0 = 0LL; 314 munged_rv1 = 0LL; 315 break; 316 default: 317 munged_rv0 = 0LL; 318 munged_rv1 = 0LL; 319 break; 320 } 321 } 322 } else { 323 munged_rv0 = 0LL; 324 munged_rv1 = 0LL; 325 } 326 327 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0); 328 } 329} 330#endif /* __APPLE__ */ 331 332#define SYSTRACE_SHIFT 16 333#define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT) 334#define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1)) 335#define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id)) 336#define SYSTRACE_RETURN(id) (id) 337 338#if ((1 << SYSTRACE_SHIFT) <= NSYSCALL) 339#error 1 << SYSTRACE_SHIFT must exceed number of system calls 340#endif 341 342static dev_info_t *systrace_devi; 343static dtrace_provider_id_t systrace_id; 344 345#if !defined (__APPLE__) 346static void 347systrace_init(struct sysent *actual, systrace_sysent_t **interposed) 348{ 349 systrace_sysent_t *sysent = *interposed; 350 int i; 351 352 if (sysent == NULL) { 353 *interposed = sysent = kmem_zalloc(sizeof (systrace_sysent_t) * 354 NSYSCALL, KM_SLEEP); 355 } 356 357 for (i = 0; i < NSYSCALL; i++) { 358 struct sysent *a = &actual[i]; 359 systrace_sysent_t *s = &sysent[i]; 360 361 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) 362 continue; 363 364 if (a->sy_callc == dtrace_systrace_syscall) 365 continue; 366 367#ifdef _SYSCALL32_IMPL 368 if (a->sy_callc == dtrace_systrace_syscall32) 369 continue; 370#endif 371 372 s->stsy_underlying = a->sy_callc; 373 } 374} 375#else 376#define systrace_init _systrace_init /* Avoid name clash with Darwin automagic conf symbol */ 377static void 378systrace_init(struct sysent *actual, systrace_sysent_t **interposed) 379{ 380 381 systrace_sysent_t *ssysent = *interposed; /* Avoid sysent shadow warning 382 from bsd/sys/sysent.h */ 383 int i; 384 385 if (ssysent == NULL) { 386 *interposed = ssysent = kmem_zalloc(sizeof (systrace_sysent_t) * 387 NSYSCALL, KM_SLEEP); 388 } 389 390 for (i = 0; i < NSYSCALL; i++) { 391 struct sysent *a = &actual[i]; 392 systrace_sysent_t *s = &ssysent[i]; 393 394 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) 395 continue; 396 397 if (a->sy_callc == dtrace_systrace_syscall) 398 continue; 399 400#ifdef _SYSCALL32_IMPL 401 if (a->sy_callc == dtrace_systrace_syscall32) 402 continue; 403#endif 404 405 s->stsy_underlying = a->sy_callc; 406 s->stsy_return_type = a->sy_return_type; 407 } 408 lck_mtx_init(&dtrace_systrace_lock, dtrace_lck_grp, dtrace_lck_attr); 409} 410 411#endif /* __APPLE__ */ 412 413/*ARGSUSED*/ 414static void 415systrace_provide(void *arg, const dtrace_probedesc_t *desc) 416{ 417#pragma unused(arg) /* __APPLE__ */ 418 int i; 419 420 if (desc != NULL) 421 return; 422 423 systrace_init(sysent, &systrace_sysent); 424#ifdef _SYSCALL32_IMPL 425 systrace_init(sysent32, &systrace_sysent32); 426#endif 427 428 for (i = 0; i < NSYSCALL; i++) { 429 if (systrace_sysent[i].stsy_underlying == NULL) 430 continue; 431 432 if (dtrace_probe_lookup(systrace_id, NULL, 433 syscallnames[i], "entry") != 0) 434 continue; 435 436 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i], 437 "entry", SYSTRACE_ARTIFICIAL_FRAMES, 438 (void *)((uintptr_t)SYSTRACE_ENTRY(i))); 439 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i], 440 "return", SYSTRACE_ARTIFICIAL_FRAMES, 441 (void *)((uintptr_t)SYSTRACE_RETURN(i))); 442 443 systrace_sysent[i].stsy_entry = DTRACE_IDNONE; 444 systrace_sysent[i].stsy_return = DTRACE_IDNONE; 445#ifdef _SYSCALL32_IMPL 446 systrace_sysent32[i].stsy_entry = DTRACE_IDNONE; 447 systrace_sysent32[i].stsy_return = DTRACE_IDNONE; 448#endif 449 } 450} 451#if defined(__APPLE__) 452#undef systrace_init 453#endif 454 455/*ARGSUSED*/ 456static void 457systrace_destroy(void *arg, dtrace_id_t id, void *parg) 458{ 459#pragma unused(arg,id) /* __APPLE__ */ 460 461 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 462 463#pragma unused(sysnum) /* __APPLE__ */ 464 /* 465 * There's nothing to do here but assert that we have actually been 466 * disabled. 467 */ 468 if (SYSTRACE_ISENTRY((uintptr_t)parg)) { 469 ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE); 470#ifdef _SYSCALL32_IMPL 471 ASSERT(systrace_sysent32[sysnum].stsy_entry == DTRACE_IDNONE); 472#endif 473 } else { 474 ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE); 475#ifdef _SYSCALL32_IMPL 476 ASSERT(systrace_sysent32[sysnum].stsy_return == DTRACE_IDNONE); 477#endif 478 } 479} 480 481/*ARGSUSED*/ 482static int 483systrace_enable(void *arg, dtrace_id_t id, void *parg) 484{ 485#pragma unused(arg) /* __APPLE__ */ 486 487 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 488 int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE || 489 systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE); 490 491 if (SYSTRACE_ISENTRY((uintptr_t)parg)) { 492 systrace_sysent[sysnum].stsy_entry = id; 493#ifdef _SYSCALL32_IMPL 494 systrace_sysent32[sysnum].stsy_entry = id; 495#endif 496 } else { 497 systrace_sysent[sysnum].stsy_return = id; 498#ifdef _SYSCALL32_IMPL 499 systrace_sysent32[sysnum].stsy_return = id; 500#endif 501 } 502 503 if (enabled) { 504 ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall); 505 return(0); 506 } 507#ifdef _SYSCALL32_IMPL 508 (void) casptr(&sysent32[sysnum].sy_callc, 509 (void *)systrace_sysent32[sysnum].stsy_underlying, 510 (void *)dtrace_systrace_syscall32); 511#endif 512 513 lck_mtx_lock(&dtrace_systrace_lock); 514 if (sysent[sysnum].sy_callc == systrace_sysent[sysnum].stsy_underlying) { 515 vm_offset_t dss = (vm_offset_t)&dtrace_systrace_syscall; 516 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(vm_offset_t)); 517 } 518 lck_mtx_unlock(&dtrace_systrace_lock); 519 return (0); 520} 521 522/*ARGSUSED*/ 523static void 524systrace_disable(void *arg, dtrace_id_t id, void *parg) 525{ 526#pragma unused(arg,id) /* __APPLE__ */ 527 528 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 529 int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE || 530 systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE); 531 532 if (disable) { 533 lck_mtx_lock(&dtrace_systrace_lock); 534 if (sysent[sysnum].sy_callc == dtrace_systrace_syscall) 535 ml_nofault_copy((vm_offset_t)&systrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(systrace_sysent[sysnum].stsy_underlying)); 536 lck_mtx_unlock(&dtrace_systrace_lock); 537 538#ifdef _SYSCALL32_IMPL 539 (void) casptr(&sysent32[sysnum].sy_callc, 540 (void *)dtrace_systrace_syscall32, 541 (void *)systrace_sysent32[sysnum].stsy_underlying); 542#endif 543 } 544 545 if (SYSTRACE_ISENTRY((uintptr_t)parg)) { 546 systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE; 547#ifdef _SYSCALL32_IMPL 548 systrace_sysent32[sysnum].stsy_entry = DTRACE_IDNONE; 549#endif 550 } else { 551 systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE; 552#ifdef _SYSCALL32_IMPL 553 systrace_sysent32[sysnum].stsy_return = DTRACE_IDNONE; 554#endif 555 } 556} 557 558static dtrace_pattr_t systrace_attr = { 559{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 560{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 561{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, 562{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 563{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, 564}; 565 566static dtrace_pops_t systrace_pops = { 567 systrace_provide, 568 NULL, 569 systrace_enable, 570 systrace_disable, 571 NULL, 572 NULL, 573 NULL, 574 systrace_getarg, 575 NULL, 576 systrace_destroy 577}; 578 579static int 580systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 581{ 582 switch (cmd) { 583 case DDI_ATTACH: 584 break; 585 case DDI_RESUME: 586 return (DDI_SUCCESS); 587 default: 588 return (DDI_FAILURE); 589 } 590 591#if !defined(__APPLE__) 592 systrace_probe = (void (*)())dtrace_probe; 593 membar_enter(); 594 595 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0, 596 DDI_PSEUDO, NULL) == DDI_FAILURE || 597 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL, 598 &systrace_pops, NULL, &systrace_id) != 0) { 599 systrace_probe = systrace_stub; 600 ddi_remove_minor_node(devi, NULL); 601 return (DDI_FAILURE); 602 } 603#else 604 systrace_probe = (void(*))&dtrace_probe; 605 membar_enter(); 606 607 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0, 608 DDI_PSEUDO, 0) == DDI_FAILURE || 609 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL, 610 &systrace_pops, NULL, &systrace_id) != 0) { 611 systrace_probe = systrace_stub; 612 ddi_remove_minor_node(devi, NULL); 613 return (DDI_FAILURE); 614 } 615#endif /* __APPLE__ */ 616 617 ddi_report_dev(devi); 618 systrace_devi = devi; 619 620 return (DDI_SUCCESS); 621} 622 623#if !defined(__APPLE__) 624static int 625systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 626{ 627 switch (cmd) { 628 case DDI_DETACH: 629 break; 630 case DDI_SUSPEND: 631 return (DDI_SUCCESS); 632 default: 633 return (DDI_FAILURE); 634 } 635 636 if (dtrace_unregister(systrace_id) != 0) 637 return (DDI_FAILURE); 638 639 ddi_remove_minor_node(devi, NULL); 640 systrace_probe = systrace_stub; 641 return (DDI_SUCCESS); 642} 643 644/*ARGSUSED*/ 645static int 646systrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 647{ 648 int error; 649 650 switch (infocmd) { 651 case DDI_INFO_DEVT2DEVINFO: 652 *result = (void *)systrace_devi; 653 error = DDI_SUCCESS; 654 break; 655 case DDI_INFO_DEVT2INSTANCE: 656 *result = (void *)0; 657 error = DDI_SUCCESS; 658 break; 659 default: 660 error = DDI_FAILURE; 661 } 662 return (error); 663} 664 665/*ARGSUSED*/ 666static int 667systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) 668{ 669 return (0); 670} 671 672static struct cb_ops systrace_cb_ops = { 673 systrace_open, /* open */ 674 nodev, /* close */ 675 nulldev, /* strategy */ 676 nulldev, /* print */ 677 nodev, /* dump */ 678 nodev, /* read */ 679 nodev, /* write */ 680 nodev, /* ioctl */ 681 nodev, /* devmap */ 682 nodev, /* mmap */ 683 nodev, /* segmap */ 684 nochpoll, /* poll */ 685 ddi_prop_op, /* cb_prop_op */ 686 0, /* streamtab */ 687 D_NEW | D_MP /* Driver compatibility flag */ 688}; 689 690static struct dev_ops systrace_ops = { 691 DEVO_REV, /* devo_rev, */ 692 0, /* refcnt */ 693 systrace_info, /* get_dev_info */ 694 nulldev, /* identify */ 695 nulldev, /* probe */ 696 systrace_attach, /* attach */ 697 systrace_detach, /* detach */ 698 nodev, /* reset */ 699 &systrace_cb_ops, /* driver operations */ 700 NULL, /* bus operations */ 701 nodev /* dev power */ 702}; 703 704/* 705 * Module linkage information for the kernel. 706 */ 707static struct modldrv modldrv = { 708 &mod_driverops, /* module type (this is a pseudo driver) */ 709 "System Call Tracing", /* name of module */ 710 &systrace_ops, /* driver ops */ 711}; 712 713static struct modlinkage modlinkage = { 714 MODREV_1, 715 (void *)&modldrv, 716 NULL 717}; 718 719int 720_init(void) 721{ 722 return (mod_install(&modlinkage)); 723} 724 725int 726_info(struct modinfo *modinfop) 727{ 728 return (mod_info(&modlinkage, modinfop)); 729} 730 731int 732_fini(void) 733{ 734 return (mod_remove(&modlinkage)); 735} 736#else 737typedef kern_return_t (*mach_call_t)(void *); 738 739/* XXX From #include <kern/syscall_sw.h> which may be changed for 64 bit! */ 740typedef void mach_munge_t(const void *, void *); 741 742typedef struct { 743 int mach_trap_arg_count; 744 kern_return_t (*mach_trap_function)(void *); 745#if 0 /* no active architectures use mungers for mach traps */ 746 mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */ 747 mach_munge_t *mach_trap_arg_munge64; /* system call arguments for 64-bit */ 748#endif 749#if MACH_ASSERT 750 const char* mach_trap_name; 751#endif /* MACH_ASSERT */ 752} mach_trap_t; 753 754extern mach_trap_t mach_trap_table[]; 755extern int mach_trap_count; 756 757extern const char *mach_syscall_name_table[]; 758 759/* XXX From osfmk/i386/bsd_i386.c */ 760struct mach_call_args { 761 syscall_arg_t arg1; 762 syscall_arg_t arg2; 763 syscall_arg_t arg3; 764 syscall_arg_t arg4; 765 syscall_arg_t arg5; 766 syscall_arg_t arg6; 767 syscall_arg_t arg7; 768 syscall_arg_t arg8; 769 syscall_arg_t arg9; 770}; 771 772#undef NSYSCALL 773#define NSYSCALL mach_trap_count 774 775#if ((1 << SYSTRACE_SHIFT) <= NSYSCALL) 776#error 1 << SYSTRACE_SHIFT must exceed number of Mach traps 777#endif 778 779typedef struct machtrace_sysent { 780 dtrace_id_t stsy_entry; 781 dtrace_id_t stsy_return; 782 kern_return_t (*stsy_underlying)(void *); 783 int32_t stsy_return_type; 784} machtrace_sysent_t; 785 786static machtrace_sysent_t *machtrace_sysent = NULL; 787 788void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t, 789 uint64_t, uint64_t, uint64_t); 790 791static uint64_t machtrace_getarg(void *, dtrace_id_t, void *, int, int); 792 793static dev_info_t *machtrace_devi; 794static dtrace_provider_id_t machtrace_id; 795 796static kern_return_t 797dtrace_machtrace_syscall(struct mach_call_args *args) 798{ 799 boolean_t flavor; 800 unsigned short code; 801 802 machtrace_sysent_t *sy; 803 dtrace_id_t id; 804 kern_return_t rval; 805#if 0 /* XXX */ 806 proc_t *p; 807#endif 808 syscall_arg_t *ip = (syscall_arg_t *)args; 809 mach_call_t mach_call; 810 811#if defined(__i386__) || defined (__x86_64__) 812#pragma unused(flavor) 813 { 814 pal_register_cache_state(current_thread(), VALID); 815 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread()); 816 817 if (is_saved_state64(tagged_regs)) { 818 code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK; 819 } else { 820 code = -saved_state32(tagged_regs)->eax; 821 } 822 } 823#else 824#error Unknown Architecture 825#endif 826 827 sy = &machtrace_sysent[code]; 828 829 if ((id = sy->stsy_entry) != DTRACE_IDNONE) { 830 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); 831 832 if (uthread) 833 uthread->t_dtrace_syscall_args = (void *)ip; 834 835 (*machtrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4)); 836 837 if (uthread) 838 uthread->t_dtrace_syscall_args = (void *)0; 839 } 840 841#if 0 /* XXX */ 842 /* 843 * We want to explicitly allow DTrace consumers to stop a process 844 * before it actually executes the meat of the syscall. 845 */ 846 p = ttoproc(curthread); 847 mutex_enter(&p->p_lock); 848 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) { 849 curthread->t_dtrace_stop = 0; 850 stop(PR_REQUESTED, 0); 851 } 852 mutex_exit(&p->p_lock); 853#endif 854 855 mach_call = (mach_call_t)(*sy->stsy_underlying); 856 rval = mach_call(args); 857 858 if ((id = sy->stsy_return) != DTRACE_IDNONE) 859 (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0); 860 861 return (rval); 862} 863 864static void 865machtrace_init(mach_trap_t *actual, machtrace_sysent_t **interposed) 866{ 867 machtrace_sysent_t *msysent = *interposed; 868 int i; 869 870 if (msysent == NULL) { 871 *interposed = msysent = kmem_zalloc(sizeof (machtrace_sysent_t) * 872 NSYSCALL, KM_SLEEP); 873 } 874 875 for (i = 0; i < NSYSCALL; i++) { 876 mach_trap_t *a = &actual[i]; 877 machtrace_sysent_t *s = &msysent[i]; 878 879 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) 880 continue; 881 882 if (a->mach_trap_function == (mach_call_t)(dtrace_machtrace_syscall)) 883 continue; 884 885 s->stsy_underlying = a->mach_trap_function; 886 } 887} 888 889/*ARGSUSED*/ 890static void 891machtrace_provide(void *arg, const dtrace_probedesc_t *desc) 892{ 893#pragma unused(arg) /* __APPLE__ */ 894 895 int i; 896 897 if (desc != NULL) 898 return; 899 900 machtrace_init(mach_trap_table, &machtrace_sysent); 901 902 for (i = 0; i < NSYSCALL; i++) { 903 904 if (machtrace_sysent[i].stsy_underlying == NULL) 905 continue; 906 907 if (dtrace_probe_lookup(machtrace_id, NULL, 908 mach_syscall_name_table[i], "entry") != 0) 909 continue; 910 911 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i], 912 "entry", MACHTRACE_ARTIFICIAL_FRAMES, 913 (void *)((uintptr_t)SYSTRACE_ENTRY(i))); 914 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i], 915 "return", MACHTRACE_ARTIFICIAL_FRAMES, 916 (void *)((uintptr_t)SYSTRACE_RETURN(i))); 917 918 machtrace_sysent[i].stsy_entry = DTRACE_IDNONE; 919 machtrace_sysent[i].stsy_return = DTRACE_IDNONE; 920 } 921} 922 923/*ARGSUSED*/ 924static void 925machtrace_destroy(void *arg, dtrace_id_t id, void *parg) 926{ 927#pragma unused(arg,id) /* __APPLE__ */ 928 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 929 930#pragma unused(sysnum) /* __APPLE__ */ 931 932 /* 933 * There's nothing to do here but assert that we have actually been 934 * disabled. 935 */ 936 if (SYSTRACE_ISENTRY((uintptr_t)parg)) { 937 ASSERT(machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE); 938 } else { 939 ASSERT(machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE); 940 } 941} 942 943/*ARGSUSED*/ 944static int 945machtrace_enable(void *arg, dtrace_id_t id, void *parg) 946{ 947#pragma unused(arg) /* __APPLE__ */ 948 949 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 950 int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE || 951 machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE); 952 953 if (SYSTRACE_ISENTRY((uintptr_t)parg)) { 954 machtrace_sysent[sysnum].stsy_entry = id; 955 } else { 956 machtrace_sysent[sysnum].stsy_return = id; 957 } 958 959 if (enabled) { 960 ASSERT(mach_trap_table[sysnum].mach_trap_function == (void *)dtrace_machtrace_syscall); 961 return(0); 962 } 963 964 lck_mtx_lock(&dtrace_systrace_lock); 965 966 if (mach_trap_table[sysnum].mach_trap_function == machtrace_sysent[sysnum].stsy_underlying) { 967 vm_offset_t dss = (vm_offset_t)&dtrace_machtrace_syscall; 968 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t)); 969 } 970 971 lck_mtx_unlock(&dtrace_systrace_lock); 972 973 return(0); 974} 975 976/*ARGSUSED*/ 977static void 978machtrace_disable(void *arg, dtrace_id_t id, void *parg) 979{ 980#pragma unused(arg,id) /* __APPLE__ */ 981 982 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 983 int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE || 984 machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE); 985 986 if (disable) { 987 988 lck_mtx_lock(&dtrace_systrace_lock); 989 990 if (mach_trap_table[sysnum].mach_trap_function == (mach_call_t)dtrace_machtrace_syscall) { 991 ml_nofault_copy((vm_offset_t)&machtrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t)); 992 } 993 lck_mtx_unlock(&dtrace_systrace_lock); 994 } 995 996 if (SYSTRACE_ISENTRY((uintptr_t)parg)) { 997 machtrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE; 998 } else { 999 machtrace_sysent[sysnum].stsy_return = DTRACE_IDNONE; 1000 } 1001} 1002 1003static dtrace_pattr_t machtrace_attr = { 1004{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 1005{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 1006{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, 1007{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 1008{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, 1009}; 1010 1011static dtrace_pops_t machtrace_pops = { 1012 machtrace_provide, 1013 NULL, 1014 machtrace_enable, 1015 machtrace_disable, 1016 NULL, 1017 NULL, 1018 NULL, 1019 machtrace_getarg, 1020 NULL, 1021 machtrace_destroy 1022}; 1023 1024static int 1025machtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 1026{ 1027 switch (cmd) { 1028 case DDI_ATTACH: 1029 break; 1030 case DDI_RESUME: 1031 return (DDI_SUCCESS); 1032 default: 1033 return (DDI_FAILURE); 1034 } 1035 1036#if !defined(__APPLE__) 1037 machtrace_probe = (void (*)())dtrace_probe; 1038 membar_enter(); 1039 1040 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0, 1041 DDI_PSEUDO, NULL) == DDI_FAILURE || 1042 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL, 1043 &machtrace_pops, NULL, &machtrace_id) != 0) { 1044 machtrace_probe = systrace_stub; 1045#else 1046 machtrace_probe = dtrace_probe; 1047 membar_enter(); 1048 1049 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0, 1050 DDI_PSEUDO, 0) == DDI_FAILURE || 1051 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL, 1052 &machtrace_pops, NULL, &machtrace_id) != 0) { 1053 machtrace_probe = (void (*))&systrace_stub; 1054#endif /* __APPLE__ */ 1055 ddi_remove_minor_node(devi, NULL); 1056 return (DDI_FAILURE); 1057 } 1058 1059 ddi_report_dev(devi); 1060 machtrace_devi = devi; 1061 1062 return (DDI_SUCCESS); 1063} 1064 1065d_open_t _systrace_open; 1066 1067int _systrace_open(dev_t dev, int flags, int devtype, struct proc *p) 1068{ 1069#pragma unused(dev,flags,devtype,p) 1070 return 0; 1071} 1072 1073#define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */ 1074 1075/* 1076 * A struct describing which functions will get invoked for certain 1077 * actions. 1078 */ 1079static struct cdevsw systrace_cdevsw = 1080{ 1081 _systrace_open, /* open */ 1082 eno_opcl, /* close */ 1083 eno_rdwrt, /* read */ 1084 eno_rdwrt, /* write */ 1085 eno_ioctl, /* ioctl */ 1086 (stop_fcn_t *)nulldev, /* stop */ 1087 (reset_fcn_t *)nulldev, /* reset */ 1088 NULL, /* tty's */ 1089 eno_select, /* select */ 1090 eno_mmap, /* mmap */ 1091 eno_strat, /* strategy */ 1092 eno_getc, /* getc */ 1093 eno_putc, /* putc */ 1094 0 /* type */ 1095}; 1096 1097static int gSysTraceInited = 0; 1098 1099void systrace_init( void ); 1100 1101void systrace_init( void ) 1102{ 1103 if (0 == gSysTraceInited) { 1104 int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw); 1105 1106 if (majdevno < 0) { 1107 printf("systrace_init: failed to allocate a major number!\n"); 1108 gSysTraceInited = 0; 1109 return; 1110 } 1111 1112 systrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH ); 1113 machtrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH ); 1114 1115 gSysTraceInited = 1; 1116 } else 1117 panic("systrace_init: called twice!\n"); 1118} 1119#undef SYSTRACE_MAJOR 1120#endif /* __APPLE__ */ 1121 1122static uint64_t 1123systrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) 1124{ 1125#pragma unused(arg,id,parg,aframes) /* __APPLE__ */ 1126 uint64_t val = 0; 1127 syscall_arg_t *stack = (syscall_arg_t *)NULL; 1128 1129 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); 1130 1131 if (uthread) 1132 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args; 1133 1134 if (!stack) 1135 return(0); 1136 1137 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 1138 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */ 1139 val = (uint64_t)*(stack+argno); 1140 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 1141 return (val); 1142} 1143 1144 1145static uint64_t 1146machtrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) 1147{ 1148#pragma unused(arg,id,parg,aframes) /* __APPLE__ */ 1149 uint64_t val = 0; 1150 syscall_arg_t *stack = (syscall_arg_t *)NULL; 1151 1152 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); 1153 1154 if (uthread) 1155 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args; 1156 1157 if (!stack) 1158 return(0); 1159 1160 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 1161 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */ 1162 val = (uint64_t)*(stack+argno); 1163 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 1164 return (val); 1165} 1166 1167