1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26/* #pragma ident "@(#)systrace.c 1.6 06/09/19 SMI" */ 27 28#if !defined(__APPLE__) 29#include <sys/dtrace.h> 30#include <sys/systrace.h> 31#include <sys/stat.h> 32#include <sys/systm.h> 33#include <sys/conf.h> 34#include <sys/ddi.h> 35#include <sys/sunddi.h> 36#include <sys/atomic.h> 37#define SYSTRACE_ARTIFICIAL_FRAMES 1 38#else 39 40#ifdef KERNEL 41#ifndef _KERNEL 42#define _KERNEL /* Solaris vs. Darwin */ 43#endif 44#endif 45 46#include <kern/thread.h> 47#include <mach/thread_status.h> 48 49/* XXX All of these should really be derived from syscall_sw.h */ 50#if defined (__x86_64__) 51#define SYSCALL_CLASS_SHIFT 24 52#define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT) 53#define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK) 54#define I386_SYSCALL_NUMBER_MASK (0xFFFF) 55#endif 56 57#include <sys/param.h> 58#include <sys/systm.h> 59#include <sys/proc.h> 60#include <sys/errno.h> 61#include <sys/ioctl.h> 62#include <sys/conf.h> 63#include <sys/fcntl.h> 64#include <miscfs/devfs/devfs.h> 65 66#include <sys/dtrace.h> 67#include <sys/dtrace_impl.h> 68#include "systrace.h" 69#include <sys/stat.h> 70#include <sys/systm.h> 71#include <sys/conf.h> 72#include <sys/user.h> 73 74#include <machine/pal_routines.h> 75 76#if defined (__x86_64__) 77#define SYSTRACE_ARTIFICIAL_FRAMES 2 78#define MACHTRACE_ARTIFICIAL_FRAMES 3 79#else 80#error Unknown Architecture 81#endif 82 83#include <sys/sysent.h> 84#define sy_callc sy_call /* Map Solaris slot name to Darwin's */ 85#define NSYSCALL nsysent /* and is less than 500 or so */ 86 87extern const char *syscallnames[]; 88 89#include <sys/dtrace_glue.h> 90#define casptr dtrace_casptr 91#define membar_enter dtrace_membar_producer 92 93#define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */ 94#define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */ 95 96extern lck_attr_t* dtrace_lck_attr; 97extern lck_grp_t* dtrace_lck_grp; 98static lck_mtx_t dtrace_systrace_lock; /* probe state lock */ 99 100systrace_sysent_t *systrace_sysent = NULL; 101void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t); 102 103static uint64_t systrace_getarg(void *, dtrace_id_t, void *, int, int); 104 105void 106systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1, 107 uint64_t arg2, uint64_t arg3, uint64_t arg4) 108{ 109#pragma unused(id,arg0,arg1,arg2,arg3,arg4) 110} 111 112int32_t 113dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv) 114{ 115 unsigned short code; /* The system call number */ 116 117 systrace_sysent_t *sy; 118 dtrace_id_t id; 119 int32_t rval; 120#if 0 /* XXX */ 121 proc_t *p; 122#endif 123 syscall_arg_t *ip = (syscall_arg_t *)uap; 124 125#if defined (__x86_64__) 126 { 127 pal_register_cache_state(current_thread(), VALID); 128 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread()); 129 130 if (is_saved_state64(tagged_regs)) { 131 x86_saved_state64_t *regs = saved_state64(tagged_regs); 132 code = regs->rax & SYSCALL_NUMBER_MASK; 133 /* 134 * Check for indirect system call... system call number 135 * passed as 'arg0' 136 */ 137 if (code == 0) { 138 code = regs->rdi; 139 } 140 } else { 141 code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK; 142 143 if (code == 0) { 144 vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof (int)); 145 code = fuword(params); 146 } 147 } 148 } 149#else 150#error Unknown Architecture 151#endif 152 153 // Bounds "check" the value of code a la unix_syscall 154 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code]; 155 156 if ((id = sy->stsy_entry) != DTRACE_IDNONE) { 157 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); 158 if (uthread) 159 uthread->t_dtrace_syscall_args = (void *)ip; 160 161 if (ip) 162 (*systrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4)); 163 else 164 (*systrace_probe)(id, 0, 0, 0, 0, 0); 165 166 if (uthread) 167 uthread->t_dtrace_syscall_args = (void *)0; 168 } 169 170#if 0 /* XXX */ 171 /* 172 * We want to explicitly allow DTrace consumers to stop a process 173 * before it actually executes the meat of the syscall. 174 */ 175 p = ttoproc(curthread); 176 mutex_enter(&p->p_lock); 177 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) { 178 curthread->t_dtrace_stop = 0; 179 stop(PR_REQUESTED, 0); 180 } 181 mutex_exit(&p->p_lock); 182#endif 183 184 rval = (*sy->stsy_underlying)(pp, uap, rv); 185 186 if ((id = sy->stsy_return) != DTRACE_IDNONE) { 187 uint64_t munged_rv0, munged_rv1; 188 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); 189 190 if (uthread) 191 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */ 192 193 /* 194 * "Decode" rv for use in the call to dtrace_probe() 195 */ 196 if (rval == ERESTART) { 197 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */ 198 munged_rv1 = -1LL; 199 } else if (rval != EJUSTRETURN) { 200 if (rval) { 201 munged_rv0 = -1LL; /* Mimic what libc will do. */ 202 munged_rv1 = -1LL; 203 } else { 204 switch (sy->stsy_return_type) { 205 case _SYSCALL_RET_INT_T: 206 munged_rv0 = rv[0]; 207 munged_rv1 = rv[1]; 208 break; 209 case _SYSCALL_RET_UINT_T: 210 munged_rv0 = ((u_int)rv[0]); 211 munged_rv1 = ((u_int)rv[1]); 212 break; 213 case _SYSCALL_RET_OFF_T: 214 case _SYSCALL_RET_UINT64_T: 215 munged_rv0 = *(u_int64_t *)rv; 216 munged_rv1 = 0LL; 217 break; 218 case _SYSCALL_RET_ADDR_T: 219 case _SYSCALL_RET_SIZE_T: 220 case _SYSCALL_RET_SSIZE_T: 221 munged_rv0 = *(user_addr_t *)rv; 222 munged_rv1 = 0LL; 223 break; 224 case _SYSCALL_RET_NONE: 225 munged_rv0 = 0LL; 226 munged_rv1 = 0LL; 227 break; 228 default: 229 munged_rv0 = 0LL; 230 munged_rv1 = 0LL; 231 break; 232 } 233 } 234 } else { 235 munged_rv0 = 0LL; 236 munged_rv1 = 0LL; 237 } 238 239 /* 240 * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says: 241 * 242 * "This is a bit of an historical artifact. At first, the syscall provider just 243 * had its return value in arg0, and the fbt and pid providers had their return 244 * values in arg1 (so that we could use arg0 for the offset of the return site). 245 * 246 * We inevitably started writing scripts where we wanted to see the return 247 * values from probes in all three providers, and we made this script easier 248 * to write by replicating the syscall return values in arg1 to match fbt and 249 * pid. We debated briefly about removing the return value from arg0, but 250 * decided that it would be less confusing to have the same data in two places 251 * than to have some non-helpful, non-intuitive value in arg0. 252 * 253 * This change was made 4/23/2003 according to the DTrace project's putback log." 254 */ 255 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0); 256 } 257 258 return (rval); 259} 260 261void 262dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv) 263{ 264 systrace_sysent_t *sy; 265 dtrace_id_t id; 266 267 // Bounds "check" the value of code a la unix_syscall_return 268 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code]; 269 270 if ((id = sy->stsy_return) != DTRACE_IDNONE) { 271 uint64_t munged_rv0, munged_rv1; 272 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); 273 274 if (uthread) 275 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */ 276 277 /* 278 * "Decode" rv for use in the call to dtrace_probe() 279 */ 280 if (rval == ERESTART) { 281 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */ 282 munged_rv1 = -1LL; 283 } else if (rval != EJUSTRETURN) { 284 if (rval) { 285 munged_rv0 = -1LL; /* Mimic what libc will do. */ 286 munged_rv1 = -1LL; 287 } else { 288 switch (sy->stsy_return_type) { 289 case _SYSCALL_RET_INT_T: 290 munged_rv0 = rv[0]; 291 munged_rv1 = rv[1]; 292 break; 293 case _SYSCALL_RET_UINT_T: 294 munged_rv0 = ((u_int)rv[0]); 295 munged_rv1 = ((u_int)rv[1]); 296 break; 297 case _SYSCALL_RET_OFF_T: 298 case _SYSCALL_RET_UINT64_T: 299 munged_rv0 = *(u_int64_t *)rv; 300 munged_rv1 = 0LL; 301 break; 302 case _SYSCALL_RET_ADDR_T: 303 case _SYSCALL_RET_SIZE_T: 304 case _SYSCALL_RET_SSIZE_T: 305 munged_rv0 = *(user_addr_t *)rv; 306 munged_rv1 = 0LL; 307 break; 308 case _SYSCALL_RET_NONE: 309 munged_rv0 = 0LL; 310 munged_rv1 = 0LL; 311 break; 312 default: 313 munged_rv0 = 0LL; 314 munged_rv1 = 0LL; 315 break; 316 } 317 } 318 } else { 319 munged_rv0 = 0LL; 320 munged_rv1 = 0LL; 321 } 322 323 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0); 324 } 325} 326#endif /* __APPLE__ */ 327 328#define SYSTRACE_SHIFT 16 329#define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT) 330#define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1)) 331#define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id)) 332#define SYSTRACE_RETURN(id) (id) 333 334#if ((1 << SYSTRACE_SHIFT) <= NSYSCALL) 335#error 1 << SYSTRACE_SHIFT must exceed number of system calls 336#endif 337 338static dev_info_t *systrace_devi; 339static dtrace_provider_id_t systrace_id; 340 341#if !defined (__APPLE__) 342static void 343systrace_init(struct sysent *actual, systrace_sysent_t **interposed) 344{ 345 systrace_sysent_t *sysent = *interposed; 346 int i; 347 348 if (sysent == NULL) { 349 *interposed = sysent = kmem_zalloc(sizeof (systrace_sysent_t) * 350 NSYSCALL, KM_SLEEP); 351 } 352 353 for (i = 0; i < NSYSCALL; i++) { 354 struct sysent *a = &actual[i]; 355 systrace_sysent_t *s = &sysent[i]; 356 357 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) 358 continue; 359 360 if (a->sy_callc == dtrace_systrace_syscall) 361 continue; 362 363#ifdef _SYSCALL32_IMPL 364 if (a->sy_callc == dtrace_systrace_syscall32) 365 continue; 366#endif 367 368 s->stsy_underlying = a->sy_callc; 369 } 370} 371#else 372#define systrace_init _systrace_init /* Avoid name clash with Darwin automagic conf symbol */ 373static void 374systrace_init(struct sysent *actual, systrace_sysent_t **interposed) 375{ 376 377 systrace_sysent_t *ssysent = *interposed; /* Avoid sysent shadow warning 378 from bsd/sys/sysent.h */ 379 int i; 380 381 if (ssysent == NULL) { 382 *interposed = ssysent = kmem_zalloc(sizeof (systrace_sysent_t) * 383 NSYSCALL, KM_SLEEP); 384 } 385 386 for (i = 0; i < NSYSCALL; i++) { 387 struct sysent *a = &actual[i]; 388 systrace_sysent_t *s = &ssysent[i]; 389 390 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) 391 continue; 392 393 if (a->sy_callc == dtrace_systrace_syscall) 394 continue; 395 396#ifdef _SYSCALL32_IMPL 397 if (a->sy_callc == dtrace_systrace_syscall32) 398 continue; 399#endif 400 401 s->stsy_underlying = a->sy_callc; 402 s->stsy_return_type = a->sy_return_type; 403 } 404 lck_mtx_init(&dtrace_systrace_lock, dtrace_lck_grp, dtrace_lck_attr); 405} 406 407#endif /* __APPLE__ */ 408 409/*ARGSUSED*/ 410static void 411systrace_provide(void *arg, const dtrace_probedesc_t *desc) 412{ 413#pragma unused(arg) /* __APPLE__ */ 414 int i; 415 416 if (desc != NULL) 417 return; 418 419 systrace_init(sysent, &systrace_sysent); 420#ifdef _SYSCALL32_IMPL 421 systrace_init(sysent32, &systrace_sysent32); 422#endif 423 424 for (i = 0; i < NSYSCALL; i++) { 425 if (systrace_sysent[i].stsy_underlying == NULL) 426 continue; 427 428 if (dtrace_probe_lookup(systrace_id, NULL, 429 syscallnames[i], "entry") != 0) 430 continue; 431 432 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i], 433 "entry", SYSTRACE_ARTIFICIAL_FRAMES, 434 (void *)((uintptr_t)SYSTRACE_ENTRY(i))); 435 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i], 436 "return", SYSTRACE_ARTIFICIAL_FRAMES, 437 (void *)((uintptr_t)SYSTRACE_RETURN(i))); 438 439 systrace_sysent[i].stsy_entry = DTRACE_IDNONE; 440 systrace_sysent[i].stsy_return = DTRACE_IDNONE; 441#ifdef _SYSCALL32_IMPL 442 systrace_sysent32[i].stsy_entry = DTRACE_IDNONE; 443 systrace_sysent32[i].stsy_return = DTRACE_IDNONE; 444#endif 445 } 446} 447#if defined(__APPLE__) 448#undef systrace_init 449#endif 450 451/*ARGSUSED*/ 452static void 453systrace_destroy(void *arg, dtrace_id_t id, void *parg) 454{ 455#pragma unused(arg,id) /* __APPLE__ */ 456 457 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 458 459#pragma unused(sysnum) /* __APPLE__ */ 460 /* 461 * There's nothing to do here but assert that we have actually been 462 * disabled. 463 */ 464 if (SYSTRACE_ISENTRY((uintptr_t)parg)) { 465 ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE); 466#ifdef _SYSCALL32_IMPL 467 ASSERT(systrace_sysent32[sysnum].stsy_entry == DTRACE_IDNONE); 468#endif 469 } else { 470 ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE); 471#ifdef _SYSCALL32_IMPL 472 ASSERT(systrace_sysent32[sysnum].stsy_return == DTRACE_IDNONE); 473#endif 474 } 475} 476 477/*ARGSUSED*/ 478static int 479systrace_enable(void *arg, dtrace_id_t id, void *parg) 480{ 481#pragma unused(arg) /* __APPLE__ */ 482 483 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 484 int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE || 485 systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE); 486 487 if (SYSTRACE_ISENTRY((uintptr_t)parg)) { 488 systrace_sysent[sysnum].stsy_entry = id; 489#ifdef _SYSCALL32_IMPL 490 systrace_sysent32[sysnum].stsy_entry = id; 491#endif 492 } else { 493 systrace_sysent[sysnum].stsy_return = id; 494#ifdef _SYSCALL32_IMPL 495 systrace_sysent32[sysnum].stsy_return = id; 496#endif 497 } 498 499 if (enabled) { 500 ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall); 501 return(0); 502 } 503#ifdef _SYSCALL32_IMPL 504 (void) casptr(&sysent32[sysnum].sy_callc, 505 (void *)systrace_sysent32[sysnum].stsy_underlying, 506 (void *)dtrace_systrace_syscall32); 507#endif 508 509 lck_mtx_lock(&dtrace_systrace_lock); 510 if (sysent[sysnum].sy_callc == systrace_sysent[sysnum].stsy_underlying) { 511 vm_offset_t dss = (vm_offset_t)&dtrace_systrace_syscall; 512 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(vm_offset_t)); 513 } 514 lck_mtx_unlock(&dtrace_systrace_lock); 515 return (0); 516} 517 518/*ARGSUSED*/ 519static void 520systrace_disable(void *arg, dtrace_id_t id, void *parg) 521{ 522#pragma unused(arg,id) /* __APPLE__ */ 523 524 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 525 int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE || 526 systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE); 527 528 if (disable) { 529 lck_mtx_lock(&dtrace_systrace_lock); 530 if (sysent[sysnum].sy_callc == dtrace_systrace_syscall) 531 ml_nofault_copy((vm_offset_t)&systrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(systrace_sysent[sysnum].stsy_underlying)); 532 lck_mtx_unlock(&dtrace_systrace_lock); 533 534#ifdef _SYSCALL32_IMPL 535 (void) casptr(&sysent32[sysnum].sy_callc, 536 (void *)dtrace_systrace_syscall32, 537 (void *)systrace_sysent32[sysnum].stsy_underlying); 538#endif 539 } 540 541 if (SYSTRACE_ISENTRY((uintptr_t)parg)) { 542 systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE; 543#ifdef _SYSCALL32_IMPL 544 systrace_sysent32[sysnum].stsy_entry = DTRACE_IDNONE; 545#endif 546 } else { 547 systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE; 548#ifdef _SYSCALL32_IMPL 549 systrace_sysent32[sysnum].stsy_return = DTRACE_IDNONE; 550#endif 551 } 552} 553 554static dtrace_pattr_t systrace_attr = { 555{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 556{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 557{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, 558{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 559{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, 560}; 561 562static dtrace_pops_t systrace_pops = { 563 systrace_provide, 564 NULL, 565 systrace_enable, 566 systrace_disable, 567 NULL, 568 NULL, 569 NULL, 570 systrace_getarg, 571 NULL, 572 systrace_destroy 573}; 574 575static int 576systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 577{ 578 switch (cmd) { 579 case DDI_ATTACH: 580 break; 581 case DDI_RESUME: 582 return (DDI_SUCCESS); 583 default: 584 return (DDI_FAILURE); 585 } 586 587#if !defined(__APPLE__) 588 systrace_probe = (void (*)())dtrace_probe; 589 membar_enter(); 590 591 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0, 592 DDI_PSEUDO, NULL) == DDI_FAILURE || 593 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL, 594 &systrace_pops, NULL, &systrace_id) != 0) { 595 systrace_probe = systrace_stub; 596 ddi_remove_minor_node(devi, NULL); 597 return (DDI_FAILURE); 598 } 599#else 600 systrace_probe = (void(*))&dtrace_probe; 601 membar_enter(); 602 603 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0, 604 DDI_PSEUDO, 0) == DDI_FAILURE || 605 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL, 606 &systrace_pops, NULL, &systrace_id) != 0) { 607 systrace_probe = systrace_stub; 608 ddi_remove_minor_node(devi, NULL); 609 return (DDI_FAILURE); 610 } 611#endif /* __APPLE__ */ 612 613 ddi_report_dev(devi); 614 systrace_devi = devi; 615 616 return (DDI_SUCCESS); 617} 618 619#if !defined(__APPLE__) 620static int 621systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 622{ 623 switch (cmd) { 624 case DDI_DETACH: 625 break; 626 case DDI_SUSPEND: 627 return (DDI_SUCCESS); 628 default: 629 return (DDI_FAILURE); 630 } 631 632 if (dtrace_unregister(systrace_id) != 0) 633 return (DDI_FAILURE); 634 635 ddi_remove_minor_node(devi, NULL); 636 systrace_probe = systrace_stub; 637 return (DDI_SUCCESS); 638} 639 640/*ARGSUSED*/ 641static int 642systrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 643{ 644 int error; 645 646 switch (infocmd) { 647 case DDI_INFO_DEVT2DEVINFO: 648 *result = (void *)systrace_devi; 649 error = DDI_SUCCESS; 650 break; 651 case DDI_INFO_DEVT2INSTANCE: 652 *result = (void *)0; 653 error = DDI_SUCCESS; 654 break; 655 default: 656 error = DDI_FAILURE; 657 } 658 return (error); 659} 660 661/*ARGSUSED*/ 662static int 663systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) 664{ 665 return (0); 666} 667 668static struct cb_ops systrace_cb_ops = { 669 systrace_open, /* open */ 670 nodev, /* close */ 671 nulldev, /* strategy */ 672 nulldev, /* print */ 673 nodev, /* dump */ 674 nodev, /* read */ 675 nodev, /* write */ 676 nodev, /* ioctl */ 677 nodev, /* devmap */ 678 nodev, /* mmap */ 679 nodev, /* segmap */ 680 nochpoll, /* poll */ 681 ddi_prop_op, /* cb_prop_op */ 682 0, /* streamtab */ 683 D_NEW | D_MP /* Driver compatibility flag */ 684}; 685 686static struct dev_ops systrace_ops = { 687 DEVO_REV, /* devo_rev, */ 688 0, /* refcnt */ 689 systrace_info, /* get_dev_info */ 690 nulldev, /* identify */ 691 nulldev, /* probe */ 692 systrace_attach, /* attach */ 693 systrace_detach, /* detach */ 694 nodev, /* reset */ 695 &systrace_cb_ops, /* driver operations */ 696 NULL, /* bus operations */ 697 nodev /* dev power */ 698}; 699 700/* 701 * Module linkage information for the kernel. 702 */ 703static struct modldrv modldrv = { 704 &mod_driverops, /* module type (this is a pseudo driver) */ 705 "System Call Tracing", /* name of module */ 706 &systrace_ops, /* driver ops */ 707}; 708 709static struct modlinkage modlinkage = { 710 MODREV_1, 711 (void *)&modldrv, 712 NULL 713}; 714 715int 716_init(void) 717{ 718 return (mod_install(&modlinkage)); 719} 720 721int 722_info(struct modinfo *modinfop) 723{ 724 return (mod_info(&modlinkage, modinfop)); 725} 726 727int 728_fini(void) 729{ 730 return (mod_remove(&modlinkage)); 731} 732#else 733typedef kern_return_t (*mach_call_t)(void *); 734 735/* XXX From #include <kern/syscall_sw.h> which may be changed for 64 bit! */ 736typedef void mach_munge_t(const void *, void *); 737 738typedef struct { 739 int mach_trap_arg_count; 740 kern_return_t (*mach_trap_function)(void *); 741#if defined(__x86_64__) 742 mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */ 743#endif 744 int mach_trap_u32_words; 745#if MACH_ASSERT 746 const char* mach_trap_name; 747#endif /* MACH_ASSERT */ 748} mach_trap_t; 749 750extern const mach_trap_t mach_trap_table[]; /* syscall_sw.h now declares this as const */ 751extern int mach_trap_count; 752 753extern const char *mach_syscall_name_table[]; 754 755/* XXX From osfmk/i386/bsd_i386.c */ 756struct mach_call_args { 757 syscall_arg_t arg1; 758 syscall_arg_t arg2; 759 syscall_arg_t arg3; 760 syscall_arg_t arg4; 761 syscall_arg_t arg5; 762 syscall_arg_t arg6; 763 syscall_arg_t arg7; 764 syscall_arg_t arg8; 765 syscall_arg_t arg9; 766}; 767 768#undef NSYSCALL 769#define NSYSCALL mach_trap_count 770 771#if ((1 << SYSTRACE_SHIFT) <= NSYSCALL) 772#error 1 << SYSTRACE_SHIFT must exceed number of Mach traps 773#endif 774 775typedef struct machtrace_sysent { 776 dtrace_id_t stsy_entry; 777 dtrace_id_t stsy_return; 778 kern_return_t (*stsy_underlying)(void *); 779 int32_t stsy_return_type; 780} machtrace_sysent_t; 781 782static machtrace_sysent_t *machtrace_sysent = NULL; 783 784void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t, 785 uint64_t, uint64_t, uint64_t); 786 787static uint64_t machtrace_getarg(void *, dtrace_id_t, void *, int, int); 788 789static dev_info_t *machtrace_devi; 790static dtrace_provider_id_t machtrace_id; 791 792static kern_return_t 793dtrace_machtrace_syscall(struct mach_call_args *args) 794{ 795 int code; /* The mach call number */ 796 797 machtrace_sysent_t *sy; 798 dtrace_id_t id; 799 kern_return_t rval; 800#if 0 /* XXX */ 801 proc_t *p; 802#endif 803 syscall_arg_t *ip = (syscall_arg_t *)args; 804 mach_call_t mach_call; 805 806#if defined (__x86_64__) 807 { 808 pal_register_cache_state(current_thread(), VALID); 809 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread()); 810 811 if (is_saved_state64(tagged_regs)) { 812 code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK; 813 } else { 814 code = -saved_state32(tagged_regs)->eax; 815 } 816 } 817#else 818#error Unknown Architecture 819#endif 820 821 sy = &machtrace_sysent[code]; 822 823 if ((id = sy->stsy_entry) != DTRACE_IDNONE) { 824 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); 825 826 if (uthread) 827 uthread->t_dtrace_syscall_args = (void *)ip; 828 829 (*machtrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4)); 830 831 if (uthread) 832 uthread->t_dtrace_syscall_args = (void *)0; 833 } 834 835#if 0 /* XXX */ 836 /* 837 * We want to explicitly allow DTrace consumers to stop a process 838 * before it actually executes the meat of the syscall. 839 */ 840 p = ttoproc(curthread); 841 mutex_enter(&p->p_lock); 842 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) { 843 curthread->t_dtrace_stop = 0; 844 stop(PR_REQUESTED, 0); 845 } 846 mutex_exit(&p->p_lock); 847#endif 848 849 mach_call = (mach_call_t)(*sy->stsy_underlying); 850 rval = mach_call(args); 851 852 if ((id = sy->stsy_return) != DTRACE_IDNONE) 853 (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0); 854 855 return (rval); 856} 857 858static void 859machtrace_init(const mach_trap_t *actual, machtrace_sysent_t **interposed) 860{ 861 machtrace_sysent_t *msysent = *interposed; 862 int i; 863 864 if (msysent == NULL) { 865 *interposed = msysent = kmem_zalloc(sizeof (machtrace_sysent_t) * 866 NSYSCALL, KM_SLEEP); 867 } 868 869 for (i = 0; i < NSYSCALL; i++) { 870 const mach_trap_t *a = &actual[i]; 871 machtrace_sysent_t *s = &msysent[i]; 872 873 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) 874 continue; 875 876 if (a->mach_trap_function == (mach_call_t)(dtrace_machtrace_syscall)) 877 continue; 878 879 s->stsy_underlying = a->mach_trap_function; 880 } 881} 882 883/*ARGSUSED*/ 884static void 885machtrace_provide(void *arg, const dtrace_probedesc_t *desc) 886{ 887#pragma unused(arg) /* __APPLE__ */ 888 889 int i; 890 891 if (desc != NULL) 892 return; 893 894 machtrace_init(mach_trap_table, &machtrace_sysent); 895 896 for (i = 0; i < NSYSCALL; i++) { 897 898 if (machtrace_sysent[i].stsy_underlying == NULL) 899 continue; 900 901 if (dtrace_probe_lookup(machtrace_id, NULL, 902 mach_syscall_name_table[i], "entry") != 0) 903 continue; 904 905 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i], 906 "entry", MACHTRACE_ARTIFICIAL_FRAMES, 907 (void *)((uintptr_t)SYSTRACE_ENTRY(i))); 908 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i], 909 "return", MACHTRACE_ARTIFICIAL_FRAMES, 910 (void *)((uintptr_t)SYSTRACE_RETURN(i))); 911 912 machtrace_sysent[i].stsy_entry = DTRACE_IDNONE; 913 machtrace_sysent[i].stsy_return = DTRACE_IDNONE; 914 } 915} 916 917/*ARGSUSED*/ 918static void 919machtrace_destroy(void *arg, dtrace_id_t id, void *parg) 920{ 921#pragma unused(arg,id) /* __APPLE__ */ 922 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 923 924#pragma unused(sysnum) /* __APPLE__ */ 925 926 /* 927 * There's nothing to do here but assert that we have actually been 928 * disabled. 929 */ 930 if (SYSTRACE_ISENTRY((uintptr_t)parg)) { 931 ASSERT(machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE); 932 } else { 933 ASSERT(machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE); 934 } 935} 936 937/*ARGSUSED*/ 938static int 939machtrace_enable(void *arg, dtrace_id_t id, void *parg) 940{ 941#pragma unused(arg) /* __APPLE__ */ 942 943 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 944 int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE || 945 machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE); 946 947 if (SYSTRACE_ISENTRY((uintptr_t)parg)) { 948 machtrace_sysent[sysnum].stsy_entry = id; 949 } else { 950 machtrace_sysent[sysnum].stsy_return = id; 951 } 952 953 if (enabled) { 954 ASSERT(mach_trap_table[sysnum].mach_trap_function == (void *)dtrace_machtrace_syscall); 955 return(0); 956 } 957 958 lck_mtx_lock(&dtrace_systrace_lock); 959 960 if (mach_trap_table[sysnum].mach_trap_function == machtrace_sysent[sysnum].stsy_underlying) { 961 vm_offset_t dss = (vm_offset_t)&dtrace_machtrace_syscall; 962 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t)); 963 } 964 965 lck_mtx_unlock(&dtrace_systrace_lock); 966 967 return(0); 968} 969 970/*ARGSUSED*/ 971static void 972machtrace_disable(void *arg, dtrace_id_t id, void *parg) 973{ 974#pragma unused(arg,id) /* __APPLE__ */ 975 976 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 977 int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE || 978 machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE); 979 980 if (disable) { 981 982 lck_mtx_lock(&dtrace_systrace_lock); 983 984 if (mach_trap_table[sysnum].mach_trap_function == (mach_call_t)dtrace_machtrace_syscall) { 985 ml_nofault_copy((vm_offset_t)&machtrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t)); 986 } 987 lck_mtx_unlock(&dtrace_systrace_lock); 988 } 989 990 if (SYSTRACE_ISENTRY((uintptr_t)parg)) { 991 machtrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE; 992 } else { 993 machtrace_sysent[sysnum].stsy_return = DTRACE_IDNONE; 994 } 995} 996 997static dtrace_pattr_t machtrace_attr = { 998{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 999{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 1000{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, 1001{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 1002{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, 1003}; 1004 1005static dtrace_pops_t machtrace_pops = { 1006 machtrace_provide, 1007 NULL, 1008 machtrace_enable, 1009 machtrace_disable, 1010 NULL, 1011 NULL, 1012 NULL, 1013 machtrace_getarg, 1014 NULL, 1015 machtrace_destroy 1016}; 1017 1018static int 1019machtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 1020{ 1021 switch (cmd) { 1022 case DDI_ATTACH: 1023 break; 1024 case DDI_RESUME: 1025 return (DDI_SUCCESS); 1026 default: 1027 return (DDI_FAILURE); 1028 } 1029 1030#if !defined(__APPLE__) 1031 machtrace_probe = (void (*)())dtrace_probe; 1032 membar_enter(); 1033 1034 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0, 1035 DDI_PSEUDO, NULL) == DDI_FAILURE || 1036 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL, 1037 &machtrace_pops, NULL, &machtrace_id) != 0) { 1038 machtrace_probe = systrace_stub; 1039#else 1040 machtrace_probe = dtrace_probe; 1041 membar_enter(); 1042 1043 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0, 1044 DDI_PSEUDO, 0) == DDI_FAILURE || 1045 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL, 1046 &machtrace_pops, NULL, &machtrace_id) != 0) { 1047 machtrace_probe = (void (*))&systrace_stub; 1048#endif /* __APPLE__ */ 1049 ddi_remove_minor_node(devi, NULL); 1050 return (DDI_FAILURE); 1051 } 1052 1053 ddi_report_dev(devi); 1054 machtrace_devi = devi; 1055 1056 return (DDI_SUCCESS); 1057} 1058 1059d_open_t _systrace_open; 1060 1061int _systrace_open(dev_t dev, int flags, int devtype, struct proc *p) 1062{ 1063#pragma unused(dev,flags,devtype,p) 1064 return 0; 1065} 1066 1067#define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */ 1068 1069/* 1070 * A struct describing which functions will get invoked for certain 1071 * actions. 1072 */ 1073static struct cdevsw systrace_cdevsw = 1074{ 1075 _systrace_open, /* open */ 1076 eno_opcl, /* close */ 1077 eno_rdwrt, /* read */ 1078 eno_rdwrt, /* write */ 1079 eno_ioctl, /* ioctl */ 1080 (stop_fcn_t *)nulldev, /* stop */ 1081 (reset_fcn_t *)nulldev, /* reset */ 1082 NULL, /* tty's */ 1083 eno_select, /* select */ 1084 eno_mmap, /* mmap */ 1085 eno_strat, /* strategy */ 1086 eno_getc, /* getc */ 1087 eno_putc, /* putc */ 1088 0 /* type */ 1089}; 1090 1091static int gSysTraceInited = 0; 1092 1093void systrace_init( void ); 1094 1095void systrace_init( void ) 1096{ 1097 if (0 == gSysTraceInited) { 1098 int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw); 1099 1100 if (majdevno < 0) { 1101 printf("systrace_init: failed to allocate a major number!\n"); 1102 gSysTraceInited = 0; 1103 return; 1104 } 1105 1106 systrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH ); 1107 machtrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH ); 1108 1109 gSysTraceInited = 1; 1110 } else 1111 panic("systrace_init: called twice!\n"); 1112} 1113#undef SYSTRACE_MAJOR 1114#endif /* __APPLE__ */ 1115 1116static uint64_t 1117systrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) 1118{ 1119#pragma unused(arg,id,parg,aframes) /* __APPLE__ */ 1120 uint64_t val = 0; 1121 syscall_arg_t *stack = (syscall_arg_t *)NULL; 1122 1123 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); 1124 1125 if (uthread) 1126 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args; 1127 1128 if (!stack) 1129 return(0); 1130 1131 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 1132 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */ 1133 val = (uint64_t)*(stack+argno); 1134 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 1135 return (val); 1136} 1137 1138 1139static uint64_t 1140machtrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) 1141{ 1142#pragma unused(arg,id,parg,aframes) /* __APPLE__ */ 1143 uint64_t val = 0; 1144 syscall_arg_t *stack = (syscall_arg_t *)NULL; 1145 1146 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); 1147 1148 if (uthread) 1149 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args; 1150 1151 if (!stack) 1152 return(0); 1153 1154 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 1155 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */ 1156 val = (uint64_t)*(stack+argno); 1157 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 1158 return (val); 1159} 1160 1161