1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26/* #pragma ident "@(#)systrace.c 1.6 06/09/19 SMI" */ 27 28#ifdef KERNEL 29#ifndef _KERNEL 30#define _KERNEL /* Solaris vs. Darwin */ 31#endif 32#endif 33 34#include <kern/thread.h> 35#include <mach/thread_status.h> 36 37/* XXX All of these should really be derived from syscall_sw.h */ 38#if defined (__x86_64__) 39#define SYSCALL_CLASS_SHIFT 24 40#define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT) 41#define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK) 42#define I386_SYSCALL_NUMBER_MASK (0xFFFF) 43#endif 44 45#include <sys/param.h> 46#include <sys/systm.h> 47#include <sys/proc.h> 48#include <sys/errno.h> 49#include <sys/ioctl.h> 50#include <sys/conf.h> 51#include <sys/fcntl.h> 52#include <miscfs/devfs/devfs.h> 53 54#include <sys/dtrace.h> 55#include <sys/dtrace_impl.h> 56#include "systrace.h" 57#include <sys/stat.h> 58#include <sys/systm.h> 59#include <sys/conf.h> 60#include <sys/user.h> 61 62#include <machine/pal_routines.h> 63 64#if defined (__x86_64__) 65#define SYSTRACE_ARTIFICIAL_FRAMES 2 66#define MACHTRACE_ARTIFICIAL_FRAMES 3 67#else 68#error Unknown Architecture 69#endif 70 71#include <sys/sysent.h> 72#define sy_callc sy_call /* Map Solaris slot name to Darwin's */ 73#define NSYSCALL nsysent /* and is less than 500 or so */ 74 75extern const char *syscallnames[]; 76 77#include <sys/dtrace_glue.h> 78#define casptr dtrace_casptr 79#define membar_enter dtrace_membar_producer 80 81#define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */ 82#define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */ 83 84extern lck_attr_t* dtrace_lck_attr; 85extern lck_grp_t* dtrace_lck_grp; 86static lck_mtx_t dtrace_systrace_lock; /* probe state lock */ 87 88systrace_sysent_t *systrace_sysent = NULL; 89void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t); 90 91static uint64_t systrace_getarg(void *, dtrace_id_t, void *, int, int); 92 93void 94systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1, 95 uint64_t arg2, uint64_t arg3, uint64_t arg4) 96{ 97#pragma unused(id,arg0,arg1,arg2,arg3,arg4) 98} 99 100int32_t 101dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv) 102{ 103 unsigned short code; /* The system call number */ 104 105 systrace_sysent_t *sy; 106 dtrace_id_t id; 107 int32_t rval; 108#if 0 /* XXX */ 109 proc_t *p; 110#endif 111 syscall_arg_t *ip = (syscall_arg_t *)uap; 112 113#if defined (__x86_64__) 114 { 115 pal_register_cache_state(current_thread(), VALID); 116 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread()); 117 118 if (is_saved_state64(tagged_regs)) { 119 x86_saved_state64_t *regs = saved_state64(tagged_regs); 120 code = regs->rax & SYSCALL_NUMBER_MASK; 121 /* 122 * Check for indirect system call... system call number 123 * passed as 'arg0' 124 */ 125 if (code == 0) { 126 code = regs->rdi; 127 } 128 } else { 129 code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK; 130 131 if (code == 0) { 132 vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof (int)); 133 code = fuword(params); 134 } 135 } 136 } 137#else 138#error Unknown Architecture 139#endif 140 141 // Bounds "check" the value of code a la unix_syscall 142 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code]; 143 144 if ((id = sy->stsy_entry) != DTRACE_IDNONE) { 145 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); 146 if (uthread) 147 uthread->t_dtrace_syscall_args = (void *)ip; 148 149 if (ip) 150 (*systrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4)); 151 else 152 (*systrace_probe)(id, 0, 0, 0, 0, 0); 153 154 if (uthread) 155 uthread->t_dtrace_syscall_args = (void *)0; 156 } 157 158#if 0 /* XXX */ 159 /* 160 * APPLE NOTE: Not implemented. 161 * We want to explicitly allow DTrace consumers to stop a process 162 * before it actually executes the meat of the syscall. 163 */ 164 p = ttoproc(curthread); 165 mutex_enter(&p->p_lock); 166 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) { 167 curthread->t_dtrace_stop = 0; 168 stop(PR_REQUESTED, 0); 169 } 170 mutex_exit(&p->p_lock); 171#endif 172 173 rval = (*sy->stsy_underlying)(pp, uap, rv); 174 175 if ((id = sy->stsy_return) != DTRACE_IDNONE) { 176 uint64_t munged_rv0, munged_rv1; 177 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); 178 179 if (uthread) 180 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */ 181 182 /* 183 * "Decode" rv for use in the call to dtrace_probe() 184 */ 185 if (rval == ERESTART) { 186 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */ 187 munged_rv1 = -1LL; 188 } else if (rval != EJUSTRETURN) { 189 if (rval) { 190 munged_rv0 = -1LL; /* Mimic what libc will do. */ 191 munged_rv1 = -1LL; 192 } else { 193 switch (sy->stsy_return_type) { 194 case _SYSCALL_RET_INT_T: 195 munged_rv0 = rv[0]; 196 munged_rv1 = rv[1]; 197 break; 198 case _SYSCALL_RET_UINT_T: 199 munged_rv0 = ((u_int)rv[0]); 200 munged_rv1 = ((u_int)rv[1]); 201 break; 202 case _SYSCALL_RET_OFF_T: 203 case _SYSCALL_RET_UINT64_T: 204 munged_rv0 = *(u_int64_t *)rv; 205 munged_rv1 = 0LL; 206 break; 207 case _SYSCALL_RET_ADDR_T: 208 case _SYSCALL_RET_SIZE_T: 209 case _SYSCALL_RET_SSIZE_T: 210 munged_rv0 = *(user_addr_t *)rv; 211 munged_rv1 = 0LL; 212 break; 213 case _SYSCALL_RET_NONE: 214 munged_rv0 = 0LL; 215 munged_rv1 = 0LL; 216 break; 217 default: 218 munged_rv0 = 0LL; 219 munged_rv1 = 0LL; 220 break; 221 } 222 } 223 } else { 224 munged_rv0 = 0LL; 225 munged_rv1 = 0LL; 226 } 227 228 /* 229 * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says: 230 * 231 * "This is a bit of an historical artifact. At first, the syscall provider just 232 * had its return value in arg0, and the fbt and pid providers had their return 233 * values in arg1 (so that we could use arg0 for the offset of the return site). 234 * 235 * We inevitably started writing scripts where we wanted to see the return 236 * values from probes in all three providers, and we made this script easier 237 * to write by replicating the syscall return values in arg1 to match fbt and 238 * pid. We debated briefly about removing the return value from arg0, but 239 * decided that it would be less confusing to have the same data in two places 240 * than to have some non-helpful, non-intuitive value in arg0. 241 * 242 * This change was made 4/23/2003 according to the DTrace project's putback log." 243 */ 244 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0); 245 } 246 247 return (rval); 248} 249 250void 251dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv) 252{ 253 systrace_sysent_t *sy; 254 dtrace_id_t id; 255 256 // Bounds "check" the value of code a la unix_syscall_return 257 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code]; 258 259 if ((id = sy->stsy_return) != DTRACE_IDNONE) { 260 uint64_t munged_rv0, munged_rv1; 261 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); 262 263 if (uthread) 264 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */ 265 266 /* 267 * "Decode" rv for use in the call to dtrace_probe() 268 */ 269 if (rval == ERESTART) { 270 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */ 271 munged_rv1 = -1LL; 272 } else if (rval != EJUSTRETURN) { 273 if (rval) { 274 munged_rv0 = -1LL; /* Mimic what libc will do. */ 275 munged_rv1 = -1LL; 276 } else { 277 switch (sy->stsy_return_type) { 278 case _SYSCALL_RET_INT_T: 279 munged_rv0 = rv[0]; 280 munged_rv1 = rv[1]; 281 break; 282 case _SYSCALL_RET_UINT_T: 283 munged_rv0 = ((u_int)rv[0]); 284 munged_rv1 = ((u_int)rv[1]); 285 break; 286 case _SYSCALL_RET_OFF_T: 287 case _SYSCALL_RET_UINT64_T: 288 munged_rv0 = *(u_int64_t *)rv; 289 munged_rv1 = 0LL; 290 break; 291 case _SYSCALL_RET_ADDR_T: 292 case _SYSCALL_RET_SIZE_T: 293 case _SYSCALL_RET_SSIZE_T: 294 munged_rv0 = *(user_addr_t *)rv; 295 munged_rv1 = 0LL; 296 break; 297 case _SYSCALL_RET_NONE: 298 munged_rv0 = 0LL; 299 munged_rv1 = 0LL; 300 break; 301 default: 302 munged_rv0 = 0LL; 303 munged_rv1 = 0LL; 304 break; 305 } 306 } 307 } else { 308 munged_rv0 = 0LL; 309 munged_rv1 = 0LL; 310 } 311 312 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0); 313 } 314} 315 316#define SYSTRACE_SHIFT 16 317#define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT) 318#define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1)) 319#define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id)) 320#define SYSTRACE_RETURN(id) (id) 321 322#if ((1 << SYSTRACE_SHIFT) <= NSYSCALL) 323#error 1 << SYSTRACE_SHIFT must exceed number of system calls 324#endif 325 326static dev_info_t *systrace_devi; 327static dtrace_provider_id_t systrace_id; 328 329/* 330 * APPLE NOTE: Avoid name clash with Darwin automagic conf symbol. 331 * See balanced undef below. 332 */ 333#define systrace_init _systrace_init 334 335static void 336systrace_init(struct sysent *actual, systrace_sysent_t **interposed) 337{ 338 339 systrace_sysent_t *ssysent = *interposed; /* Avoid sysent shadow warning 340 from bsd/sys/sysent.h */ 341 int i; 342 343 if (ssysent == NULL) { 344 *interposed = ssysent = kmem_zalloc(sizeof (systrace_sysent_t) * 345 NSYSCALL, KM_SLEEP); 346 } 347 348 for (i = 0; i < NSYSCALL; i++) { 349 struct sysent *a = &actual[i]; 350 systrace_sysent_t *s = &ssysent[i]; 351 352 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) 353 continue; 354 355 if (a->sy_callc == dtrace_systrace_syscall) 356 continue; 357 358#ifdef _SYSCALL32_IMPL 359 if (a->sy_callc == dtrace_systrace_syscall32) 360 continue; 361#endif 362 363 s->stsy_underlying = a->sy_callc; 364 s->stsy_return_type = a->sy_return_type; 365 } 366 lck_mtx_init(&dtrace_systrace_lock, dtrace_lck_grp, dtrace_lck_attr); 367} 368 369 370/*ARGSUSED*/ 371static void 372systrace_provide(void *arg, const dtrace_probedesc_t *desc) 373{ 374#pragma unused(arg) /* __APPLE__ */ 375 int i; 376 377 if (desc != NULL) 378 return; 379 380 systrace_init(sysent, &systrace_sysent); 381#ifdef _SYSCALL32_IMPL 382 systrace_init(sysent32, &systrace_sysent32); 383#endif 384 385 for (i = 0; i < NSYSCALL; i++) { 386 if (systrace_sysent[i].stsy_underlying == NULL) 387 continue; 388 389 if (dtrace_probe_lookup(systrace_id, NULL, 390 syscallnames[i], "entry") != 0) 391 continue; 392 393 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i], 394 "entry", SYSTRACE_ARTIFICIAL_FRAMES, 395 (void *)((uintptr_t)SYSTRACE_ENTRY(i))); 396 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i], 397 "return", SYSTRACE_ARTIFICIAL_FRAMES, 398 (void *)((uintptr_t)SYSTRACE_RETURN(i))); 399 400 systrace_sysent[i].stsy_entry = DTRACE_IDNONE; 401 systrace_sysent[i].stsy_return = DTRACE_IDNONE; 402#ifdef _SYSCALL32_IMPL 403 systrace_sysent32[i].stsy_entry = DTRACE_IDNONE; 404 systrace_sysent32[i].stsy_return = DTRACE_IDNONE; 405#endif 406 } 407} 408#undef systrace_init 409 410/*ARGSUSED*/ 411static void 412systrace_destroy(void *arg, dtrace_id_t id, void *parg) 413{ 414#pragma unused(arg,id) /* __APPLE__ */ 415 416 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 417 418#pragma unused(sysnum) /* __APPLE__ */ 419 /* 420 * There's nothing to do here but assert that we have actually been 421 * disabled. 422 */ 423 if (SYSTRACE_ISENTRY((uintptr_t)parg)) { 424 ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE); 425#ifdef _SYSCALL32_IMPL 426 ASSERT(systrace_sysent32[sysnum].stsy_entry == DTRACE_IDNONE); 427#endif 428 } else { 429 ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE); 430#ifdef _SYSCALL32_IMPL 431 ASSERT(systrace_sysent32[sysnum].stsy_return == DTRACE_IDNONE); 432#endif 433 } 434} 435 436/*ARGSUSED*/ 437static int 438systrace_enable(void *arg, dtrace_id_t id, void *parg) 439{ 440#pragma unused(arg) /* __APPLE__ */ 441 442 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 443 int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE || 444 systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE); 445 446 if (SYSTRACE_ISENTRY((uintptr_t)parg)) { 447 systrace_sysent[sysnum].stsy_entry = id; 448#ifdef _SYSCALL32_IMPL 449 systrace_sysent32[sysnum].stsy_entry = id; 450#endif 451 } else { 452 systrace_sysent[sysnum].stsy_return = id; 453#ifdef _SYSCALL32_IMPL 454 systrace_sysent32[sysnum].stsy_return = id; 455#endif 456 } 457 458 if (enabled) { 459 ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall); 460 return(0); 461 } 462#ifdef _SYSCALL32_IMPL 463 (void) casptr(&sysent32[sysnum].sy_callc, 464 (void *)systrace_sysent32[sysnum].stsy_underlying, 465 (void *)dtrace_systrace_syscall32); 466#endif 467 468 lck_mtx_lock(&dtrace_systrace_lock); 469 if (sysent[sysnum].sy_callc == systrace_sysent[sysnum].stsy_underlying) { 470 vm_offset_t dss = (vm_offset_t)&dtrace_systrace_syscall; 471 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(vm_offset_t)); 472 } 473 lck_mtx_unlock(&dtrace_systrace_lock); 474 return (0); 475} 476 477/*ARGSUSED*/ 478static void 479systrace_disable(void *arg, dtrace_id_t id, void *parg) 480{ 481#pragma unused(arg,id) /* __APPLE__ */ 482 483 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 484 int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE || 485 systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE); 486 487 if (disable) { 488 lck_mtx_lock(&dtrace_systrace_lock); 489 if (sysent[sysnum].sy_callc == dtrace_systrace_syscall) 490 ml_nofault_copy((vm_offset_t)&systrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(systrace_sysent[sysnum].stsy_underlying)); 491 lck_mtx_unlock(&dtrace_systrace_lock); 492 493#ifdef _SYSCALL32_IMPL 494 (void) casptr(&sysent32[sysnum].sy_callc, 495 (void *)dtrace_systrace_syscall32, 496 (void *)systrace_sysent32[sysnum].stsy_underlying); 497#endif 498 } 499 500 if (SYSTRACE_ISENTRY((uintptr_t)parg)) { 501 systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE; 502#ifdef _SYSCALL32_IMPL 503 systrace_sysent32[sysnum].stsy_entry = DTRACE_IDNONE; 504#endif 505 } else { 506 systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE; 507#ifdef _SYSCALL32_IMPL 508 systrace_sysent32[sysnum].stsy_return = DTRACE_IDNONE; 509#endif 510 } 511} 512 513static dtrace_pattr_t systrace_attr = { 514{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 515{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 516{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, 517{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 518{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, 519}; 520 521static dtrace_pops_t systrace_pops = { 522 systrace_provide, 523 NULL, 524 systrace_enable, 525 systrace_disable, 526 NULL, 527 NULL, 528 NULL, 529 systrace_getarg, 530 NULL, 531 systrace_destroy 532}; 533 534static int 535systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 536{ 537 switch (cmd) { 538 case DDI_ATTACH: 539 break; 540 case DDI_RESUME: 541 return (DDI_SUCCESS); 542 default: 543 return (DDI_FAILURE); 544 } 545 546 systrace_probe = (void(*))&dtrace_probe; 547 membar_enter(); 548 549 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0, 550 DDI_PSEUDO, 0) == DDI_FAILURE || 551 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL, 552 &systrace_pops, NULL, &systrace_id) != 0) { 553 systrace_probe = systrace_stub; 554 ddi_remove_minor_node(devi, NULL); 555 return (DDI_FAILURE); 556 } 557 558 ddi_report_dev(devi); 559 systrace_devi = devi; 560 561 return (DDI_SUCCESS); 562} 563 564 565/* 566 * APPLE NOTE: systrace_detach not implemented 567 */ 568#if !defined(__APPLE__) 569static int 570systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 571{ 572 switch (cmd) { 573 case DDI_DETACH: 574 break; 575 case DDI_SUSPEND: 576 return (DDI_SUCCESS); 577 default: 578 return (DDI_FAILURE); 579 } 580 581 if (dtrace_unregister(systrace_id) != 0) 582 return (DDI_FAILURE); 583 584 ddi_remove_minor_node(devi, NULL); 585 systrace_probe = systrace_stub; 586 return (DDI_SUCCESS); 587} 588#endif /* __APPLE__ */ 589 590 591typedef kern_return_t (*mach_call_t)(void *); 592 593/* APPLE NOTE: From #include <kern/syscall_sw.h> which may be changed for 64 bit! */ 594typedef void mach_munge_t(void *); 595 596typedef struct { 597 int mach_trap_arg_count; 598 kern_return_t (*mach_trap_function)(void *); 599#if defined(__arm64__) || defined(__x86_64__) 600 mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */ 601#endif 602 int mach_trap_u32_words; 603#if MACH_ASSERT 604 const char* mach_trap_name; 605#endif /* MACH_ASSERT */ 606} mach_trap_t; 607 608extern const mach_trap_t mach_trap_table[]; /* syscall_sw.h now declares this as const */ 609extern int mach_trap_count; 610 611extern const char *mach_syscall_name_table[]; 612 613/* XXX From osfmk/i386/bsd_i386.c */ 614struct mach_call_args { 615 syscall_arg_t arg1; 616 syscall_arg_t arg2; 617 syscall_arg_t arg3; 618 syscall_arg_t arg4; 619 syscall_arg_t arg5; 620 syscall_arg_t arg6; 621 syscall_arg_t arg7; 622 syscall_arg_t arg8; 623 syscall_arg_t arg9; 624}; 625 626#undef NSYSCALL 627#define NSYSCALL mach_trap_count 628 629#if ((1 << SYSTRACE_SHIFT) <= NSYSCALL) 630#error 1 << SYSTRACE_SHIFT must exceed number of Mach traps 631#endif 632 633typedef struct machtrace_sysent { 634 dtrace_id_t stsy_entry; 635 dtrace_id_t stsy_return; 636 kern_return_t (*stsy_underlying)(void *); 637 int32_t stsy_return_type; 638} machtrace_sysent_t; 639 640static machtrace_sysent_t *machtrace_sysent = NULL; 641 642void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t, 643 uint64_t, uint64_t, uint64_t); 644 645static uint64_t machtrace_getarg(void *, dtrace_id_t, void *, int, int); 646 647static dev_info_t *machtrace_devi; 648static dtrace_provider_id_t machtrace_id; 649 650static kern_return_t 651dtrace_machtrace_syscall(struct mach_call_args *args) 652{ 653 int code; /* The mach call number */ 654 655 machtrace_sysent_t *sy; 656 dtrace_id_t id; 657 kern_return_t rval; 658#if 0 /* XXX */ 659 proc_t *p; 660#endif 661 syscall_arg_t *ip = (syscall_arg_t *)args; 662 mach_call_t mach_call; 663 664#if defined (__x86_64__) 665 { 666 pal_register_cache_state(current_thread(), VALID); 667 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread()); 668 669 if (is_saved_state64(tagged_regs)) { 670 code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK; 671 } else { 672 code = -saved_state32(tagged_regs)->eax; 673 } 674 } 675#else 676#error Unknown Architecture 677#endif 678 679 sy = &machtrace_sysent[code]; 680 681 if ((id = sy->stsy_entry) != DTRACE_IDNONE) { 682 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); 683 684 if (uthread) 685 uthread->t_dtrace_syscall_args = (void *)ip; 686 687 (*machtrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4)); 688 689 if (uthread) 690 uthread->t_dtrace_syscall_args = (void *)0; 691 } 692 693#if 0 /* XXX */ 694 /* 695 * APPLE NOTE: Not implemented. 696 * We want to explicitly allow DTrace consumers to stop a process 697 * before it actually executes the meat of the syscall. 698 */ 699 p = ttoproc(curthread); 700 mutex_enter(&p->p_lock); 701 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) { 702 curthread->t_dtrace_stop = 0; 703 stop(PR_REQUESTED, 0); 704 } 705 mutex_exit(&p->p_lock); 706#endif 707 708 mach_call = (mach_call_t)(*sy->stsy_underlying); 709 rval = mach_call(args); 710 711 if ((id = sy->stsy_return) != DTRACE_IDNONE) 712 (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0); 713 714 return (rval); 715} 716 717static void 718machtrace_init(const mach_trap_t *actual, machtrace_sysent_t **interposed) 719{ 720 machtrace_sysent_t *msysent = *interposed; 721 int i; 722 723 if (msysent == NULL) { 724 *interposed = msysent = kmem_zalloc(sizeof (machtrace_sysent_t) * 725 NSYSCALL, KM_SLEEP); 726 } 727 728 for (i = 0; i < NSYSCALL; i++) { 729 const mach_trap_t *a = &actual[i]; 730 machtrace_sysent_t *s = &msysent[i]; 731 732 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) 733 continue; 734 735 if (a->mach_trap_function == (mach_call_t)(dtrace_machtrace_syscall)) 736 continue; 737 738 s->stsy_underlying = a->mach_trap_function; 739 } 740} 741 742/*ARGSUSED*/ 743static void 744machtrace_provide(void *arg, const dtrace_probedesc_t *desc) 745{ 746#pragma unused(arg) /* __APPLE__ */ 747 748 int i; 749 750 if (desc != NULL) 751 return; 752 753 machtrace_init(mach_trap_table, &machtrace_sysent); 754 755 for (i = 0; i < NSYSCALL; i++) { 756 757 if (machtrace_sysent[i].stsy_underlying == NULL) 758 continue; 759 760 if (dtrace_probe_lookup(machtrace_id, NULL, 761 mach_syscall_name_table[i], "entry") != 0) 762 continue; 763 764 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i], 765 "entry", MACHTRACE_ARTIFICIAL_FRAMES, 766 (void *)((uintptr_t)SYSTRACE_ENTRY(i))); 767 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i], 768 "return", MACHTRACE_ARTIFICIAL_FRAMES, 769 (void *)((uintptr_t)SYSTRACE_RETURN(i))); 770 771 machtrace_sysent[i].stsy_entry = DTRACE_IDNONE; 772 machtrace_sysent[i].stsy_return = DTRACE_IDNONE; 773 } 774} 775 776/*ARGSUSED*/ 777static void 778machtrace_destroy(void *arg, dtrace_id_t id, void *parg) 779{ 780#pragma unused(arg,id) /* __APPLE__ */ 781 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 782 783#pragma unused(sysnum) /* __APPLE__ */ 784 785 /* 786 * There's nothing to do here but assert that we have actually been 787 * disabled. 788 */ 789 if (SYSTRACE_ISENTRY((uintptr_t)parg)) { 790 ASSERT(machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE); 791 } else { 792 ASSERT(machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE); 793 } 794} 795 796/*ARGSUSED*/ 797static int 798machtrace_enable(void *arg, dtrace_id_t id, void *parg) 799{ 800#pragma unused(arg) /* __APPLE__ */ 801 802 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 803 int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE || 804 machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE); 805 806 if (SYSTRACE_ISENTRY((uintptr_t)parg)) { 807 machtrace_sysent[sysnum].stsy_entry = id; 808 } else { 809 machtrace_sysent[sysnum].stsy_return = id; 810 } 811 812 if (enabled) { 813 ASSERT(mach_trap_table[sysnum].mach_trap_function == (void *)dtrace_machtrace_syscall); 814 return(0); 815 } 816 817 lck_mtx_lock(&dtrace_systrace_lock); 818 819 if (mach_trap_table[sysnum].mach_trap_function == machtrace_sysent[sysnum].stsy_underlying) { 820 vm_offset_t dss = (vm_offset_t)&dtrace_machtrace_syscall; 821 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t)); 822 } 823 824 lck_mtx_unlock(&dtrace_systrace_lock); 825 826 return(0); 827} 828 829/*ARGSUSED*/ 830static void 831machtrace_disable(void *arg, dtrace_id_t id, void *parg) 832{ 833#pragma unused(arg,id) /* __APPLE__ */ 834 835 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); 836 int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE || 837 machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE); 838 839 if (disable) { 840 841 lck_mtx_lock(&dtrace_systrace_lock); 842 843 if (mach_trap_table[sysnum].mach_trap_function == (mach_call_t)dtrace_machtrace_syscall) { 844 ml_nofault_copy((vm_offset_t)&machtrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t)); 845 } 846 lck_mtx_unlock(&dtrace_systrace_lock); 847 } 848 849 if (SYSTRACE_ISENTRY((uintptr_t)parg)) { 850 machtrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE; 851 } else { 852 machtrace_sysent[sysnum].stsy_return = DTRACE_IDNONE; 853 } 854} 855 856static dtrace_pattr_t machtrace_attr = { 857{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 858{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 859{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, 860{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 861{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, 862}; 863 864static dtrace_pops_t machtrace_pops = { 865 machtrace_provide, 866 NULL, 867 machtrace_enable, 868 machtrace_disable, 869 NULL, 870 NULL, 871 NULL, 872 machtrace_getarg, 873 NULL, 874 machtrace_destroy 875}; 876 877static int 878machtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 879{ 880 switch (cmd) { 881 case DDI_ATTACH: 882 break; 883 case DDI_RESUME: 884 return (DDI_SUCCESS); 885 default: 886 return (DDI_FAILURE); 887 } 888 889 machtrace_probe = dtrace_probe; 890 membar_enter(); 891 892 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0, 893 DDI_PSEUDO, 0) == DDI_FAILURE || 894 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL, 895 &machtrace_pops, NULL, &machtrace_id) != 0) { 896 machtrace_probe = (void (*))&systrace_stub; 897 ddi_remove_minor_node(devi, NULL); 898 return (DDI_FAILURE); 899 } 900 901 ddi_report_dev(devi); 902 machtrace_devi = devi; 903 904 return (DDI_SUCCESS); 905} 906 907d_open_t _systrace_open; 908 909int _systrace_open(dev_t dev, int flags, int devtype, struct proc *p) 910{ 911#pragma unused(dev,flags,devtype,p) 912 return 0; 913} 914 915#define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */ 916 917/* 918 * A struct describing which functions will get invoked for certain 919 * actions. 920 */ 921static struct cdevsw systrace_cdevsw = 922{ 923 _systrace_open, /* open */ 924 eno_opcl, /* close */ 925 eno_rdwrt, /* read */ 926 eno_rdwrt, /* write */ 927 eno_ioctl, /* ioctl */ 928 (stop_fcn_t *)nulldev, /* stop */ 929 (reset_fcn_t *)nulldev, /* reset */ 930 NULL, /* tty's */ 931 eno_select, /* select */ 932 eno_mmap, /* mmap */ 933 eno_strat, /* strategy */ 934 eno_getc, /* getc */ 935 eno_putc, /* putc */ 936 0 /* type */ 937}; 938 939static int gSysTraceInited = 0; 940 941void systrace_init( void ); 942 943void systrace_init( void ) 944{ 945 if (0 == gSysTraceInited) { 946 int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw); 947 948 if (majdevno < 0) { 949 printf("systrace_init: failed to allocate a major number!\n"); 950 gSysTraceInited = 0; 951 return; 952 } 953 954 systrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH ); 955 machtrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH ); 956 957 gSysTraceInited = 1; 958 } else 959 panic("systrace_init: called twice!\n"); 960} 961#undef SYSTRACE_MAJOR 962 963static uint64_t 964systrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) 965{ 966#pragma unused(arg,id,parg,aframes) /* __APPLE__ */ 967 uint64_t val = 0; 968 syscall_arg_t *stack = (syscall_arg_t *)NULL; 969 970 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); 971 972 if (uthread) 973 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args; 974 975 if (!stack) 976 return(0); 977 978 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 979 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */ 980 val = (uint64_t)*(stack+argno); 981 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 982 return (val); 983} 984 985 986static uint64_t 987machtrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) 988{ 989#pragma unused(arg,id,parg,aframes) /* __APPLE__ */ 990 uint64_t val = 0; 991 syscall_arg_t *stack = (syscall_arg_t *)NULL; 992 993 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); 994 995 if (uthread) 996 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args; 997 998 if (!stack) 999 return(0); 1000 1001 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 1002 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */ 1003 val = (uint64_t)*(stack+argno); 1004 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 1005 return (val); 1006} 1007 1008