1/* 2 * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29#define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */ 30#include <kern/thread.h> 31#include <mach/thread_status.h> 32 33typedef x86_saved_state_t savearea_t; 34 35#include <stdarg.h> 36#include <string.h> 37#include <sys/malloc.h> 38#include <sys/time.h> 39#include <sys/systm.h> 40#include <sys/proc.h> 41#include <sys/proc_internal.h> 42#include <sys/kauth.h> 43#include <sys/dtrace.h> 44#include <sys/dtrace_impl.h> 45#include <libkern/OSAtomic.h> 46#include <kern/thread_call.h> 47#include <kern/task.h> 48#include <kern/sched_prim.h> 49#include <miscfs/devfs/devfs.h> 50#include <mach/vm_param.h> 51 52extern dtrace_id_t dtrace_probeid_error; /* special ERROR probe */ 53 54void 55dtrace_probe_error(dtrace_state_t *state, dtrace_epid_t epid, int which, 56 int fault, int fltoffs, uint64_t illval) 57{ 58 /* 59 * For the case of the error probe firing lets 60 * stash away "illval" here, and special-case retrieving it in DIF_VARIABLE_ARG. 61 */ 62 state->dts_arg_error_illval = illval; 63 dtrace_probe( dtrace_probeid_error, (uint64_t)(uintptr_t)state, epid, which, fault, fltoffs ); 64} 65 66/* 67 * Atomicity and synchronization 68 */ 69void 70dtrace_membar_producer(void) 71{ 72 __asm__ volatile("sfence"); 73} 74 75void 76dtrace_membar_consumer(void) 77{ 78 __asm__ volatile("lfence"); 79} 80 81/* 82 * Interrupt manipulation 83 * XXX dtrace_getipl() can be called from probe context. 84 */ 85int 86dtrace_getipl(void) 87{ 88 /* 89 * XXX Drat, get_interrupt_level is MACH_KERNEL_PRIVATE 90 * in osfmk/kern/cpu_data.h 91 */ 92 /* return get_interrupt_level(); */ 93 return (ml_at_interrupt_context() ? 1: 0); 94} 95 96/* 97 * MP coordination 98 */ 99 100extern void mp_broadcast( 101 void (*action_func)(void *), 102 void *arg); 103 104typedef struct xcArg { 105 processorid_t cpu; 106 dtrace_xcall_t f; 107 void *arg; 108} xcArg_t; 109 110static void 111xcRemote( void *foo ) 112{ 113 xcArg_t *pArg = (xcArg_t *)foo; 114 115 if ( pArg->cpu == CPU->cpu_id || pArg->cpu == DTRACE_CPUALL ) { 116 (pArg->f)(pArg->arg); 117 } 118} 119 120/* 121 * dtrace_xcall() is not called from probe context. 122 */ 123void 124dtrace_xcall(processorid_t cpu, dtrace_xcall_t f, void *arg) 125{ 126 xcArg_t xcArg; 127 128 xcArg.cpu = cpu; 129 xcArg.f = f; 130 xcArg.arg = arg; 131 132 mp_broadcast( xcRemote, (void *)&xcArg); 133} 134 135/* 136 * Runtime and ABI 137 */ 138extern greg_t 139dtrace_getfp(void) 140{ 141 return (greg_t)__builtin_frame_address(0); 142} 143 144uint64_t 145dtrace_getreg(struct regs *savearea, uint_t reg) 146{ 147 boolean_t is64Bit = proc_is64bit(current_proc()); 148 x86_saved_state_t *regs = (x86_saved_state_t *)savearea; 149 150 if (is64Bit) { 151 /* beyond register SS */ 152 if (reg > x86_SAVED_STATE64_COUNT - 1) { 153 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 154 return (0); 155 } 156 return ((uint64_t *)(&(regs->ss_64.gs)))[reg]; 157 } else { 158 /* beyond register SS */ 159 if (reg > x86_SAVED_STATE32_COUNT - 1) { 160 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 161 return (0); 162 } 163 return (uint64_t)((unsigned int *)(&(regs->ss_32.gs)))[reg]; 164 } 165 166} 167 168#define RETURN_OFFSET 4 169#define RETURN_OFFSET64 8 170 171static int 172dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, user_addr_t pc, 173 user_addr_t sp) 174{ 175#if 0 176 volatile uint16_t *flags = 177 (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 178 179 uintptr_t oldcontext = lwp->lwp_oldcontext; /* XXX signal stack crawl */ 180 size_t s1, s2; 181#endif 182 int ret = 0; 183 boolean_t is64Bit = proc_is64bit(current_proc()); 184 185 ASSERT(pcstack == NULL || pcstack_limit > 0); 186 187#if 0 /* XXX signal stack crawl */ 188 if (p->p_model == DATAMODEL_NATIVE) { 189 s1 = sizeof (struct frame) + 2 * sizeof (long); 190 s2 = s1 + sizeof (siginfo_t); 191 } else { 192 s1 = sizeof (struct frame32) + 3 * sizeof (int); 193 s2 = s1 + sizeof (siginfo32_t); 194 } 195#endif 196 197 while (pc != 0) { 198 ret++; 199 if (pcstack != NULL) { 200 *pcstack++ = (uint64_t)pc; 201 pcstack_limit--; 202 if (pcstack_limit <= 0) 203 break; 204 } 205 206 if (sp == 0) 207 break; 208 209#if 0 /* XXX signal stack crawl */ 210 if (oldcontext == sp + s1 || oldcontext == sp + s2) { 211 if (p->p_model == DATAMODEL_NATIVE) { 212 ucontext_t *ucp = (ucontext_t *)oldcontext; 213 greg_t *gregs = ucp->uc_mcontext.gregs; 214 215 sp = dtrace_fulword(&gregs[REG_FP]); 216 pc = dtrace_fulword(&gregs[REG_PC]); 217 218 oldcontext = dtrace_fulword(&ucp->uc_link); 219 } else { 220 ucontext32_t *ucp = (ucontext32_t *)oldcontext; 221 greg32_t *gregs = ucp->uc_mcontext.gregs; 222 223 sp = dtrace_fuword32(&gregs[EBP]); 224 pc = dtrace_fuword32(&gregs[EIP]); 225 226 oldcontext = dtrace_fuword32(&ucp->uc_link); 227 } 228 } 229 else 230#endif 231 { 232 if (is64Bit) { 233 pc = dtrace_fuword64((sp + RETURN_OFFSET64)); 234 sp = dtrace_fuword64(sp); 235 } else { 236 pc = dtrace_fuword32((sp + RETURN_OFFSET)); 237 sp = dtrace_fuword32(sp); 238 } 239 } 240 241#if 0 /* XXX */ 242 /* 243 * This is totally bogus: if we faulted, we're going to clear 244 * the fault and break. This is to deal with the apparently 245 * broken Java stacks on x86. 246 */ 247 if (*flags & CPU_DTRACE_FAULT) { 248 *flags &= ~CPU_DTRACE_FAULT; 249 break; 250 } 251#endif 252 } 253 254 return (ret); 255} 256 257void 258dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit) 259{ 260 thread_t thread = current_thread(); 261 x86_saved_state_t *regs; 262 user_addr_t pc, sp, fp; 263 volatile uint16_t *flags = 264 (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 265 int n; 266 boolean_t is64Bit = proc_is64bit(current_proc()); 267 268 if (*flags & CPU_DTRACE_FAULT) 269 return; 270 271 if (pcstack_limit <= 0) 272 return; 273 274 /* 275 * If there's no user context we still need to zero the stack. 276 */ 277 if (thread == NULL) 278 goto zero; 279 280 regs = (x86_saved_state_t *)find_user_regs(thread); 281 if (regs == NULL) 282 goto zero; 283 284 *pcstack++ = (uint64_t)proc_selfpid(); 285 pcstack_limit--; 286 287 if (pcstack_limit <= 0) 288 return; 289 290 if (is64Bit) { 291 pc = regs->ss_64.isf.rip; 292 sp = regs->ss_64.isf.rsp; 293 fp = regs->ss_64.rbp; 294 } else { 295 pc = regs->ss_32.eip; 296 sp = regs->ss_32.uesp; 297 fp = regs->ss_32.ebp; 298 } 299 300 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { 301 *pcstack++ = (uint64_t)pc; 302 pcstack_limit--; 303 if (pcstack_limit <= 0) 304 return; 305 306 if (is64Bit) 307 pc = dtrace_fuword64(sp); 308 else 309 pc = dtrace_fuword32(sp); 310 } 311 312 /* 313 * Note that unlike ppc, the x86 code does not use 314 * CPU_DTRACE_USTACK_FP. This is because x86 always 315 * traces from the fp, even in syscall/profile/fbt 316 * providers. 317 */ 318 n = dtrace_getustack_common(pcstack, pcstack_limit, pc, fp); 319 ASSERT(n >= 0); 320 ASSERT(n <= pcstack_limit); 321 322 pcstack += n; 323 pcstack_limit -= n; 324 325zero: 326 while (pcstack_limit-- > 0) 327 *pcstack++ = 0; 328} 329 330int 331dtrace_getustackdepth(void) 332{ 333 thread_t thread = current_thread(); 334 x86_saved_state_t *regs; 335 user_addr_t pc, sp, fp; 336 int n = 0; 337 boolean_t is64Bit = proc_is64bit(current_proc()); 338 339 if (thread == NULL) 340 return 0; 341 342 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT)) 343 return (-1); 344 345 regs = (x86_saved_state_t *)find_user_regs(thread); 346 if (regs == NULL) 347 return 0; 348 349 if (is64Bit) { 350 pc = regs->ss_64.isf.rip; 351 sp = regs->ss_64.isf.rsp; 352 fp = regs->ss_64.rbp; 353 } else { 354 pc = regs->ss_32.eip; 355 sp = regs->ss_32.uesp; 356 fp = regs->ss_32.ebp; 357 } 358 359 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { 360 n++; 361 362 if (is64Bit) 363 pc = dtrace_fuword64(sp); 364 else 365 pc = dtrace_fuword32(sp); 366 } 367 368 /* 369 * Note that unlike ppc, the x86 code does not use 370 * CPU_DTRACE_USTACK_FP. This is because x86 always 371 * traces from the fp, even in syscall/profile/fbt 372 * providers. 373 */ 374 375 n += dtrace_getustack_common(NULL, 0, pc, fp); 376 377 return (n); 378} 379 380void 381dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) 382{ 383 thread_t thread = current_thread(); 384 savearea_t *regs; 385 user_addr_t pc, sp; 386 volatile uint16_t *flags = 387 (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 388#if 0 389 uintptr_t oldcontext; 390 size_t s1, s2; 391#endif 392 boolean_t is64Bit = proc_is64bit(current_proc()); 393 394 if (*flags & CPU_DTRACE_FAULT) 395 return; 396 397 if (pcstack_limit <= 0) 398 return; 399 400 /* 401 * If there's no user context we still need to zero the stack. 402 */ 403 if (thread == NULL) 404 goto zero; 405 406 regs = (savearea_t *)find_user_regs(thread); 407 if (regs == NULL) 408 goto zero; 409 410 *pcstack++ = (uint64_t)proc_selfpid(); 411 pcstack_limit--; 412 413 if (pcstack_limit <= 0) 414 return; 415 416 pc = regs->ss_32.eip; 417 sp = regs->ss_32.ebp; 418 419#if 0 /* XXX signal stack crawl */ 420 oldcontext = lwp->lwp_oldcontext; 421 422 if (p->p_model == DATAMODEL_NATIVE) { 423 s1 = sizeof (struct frame) + 2 * sizeof (long); 424 s2 = s1 + sizeof (siginfo_t); 425 } else { 426 s1 = sizeof (struct frame32) + 3 * sizeof (int); 427 s2 = s1 + sizeof (siginfo32_t); 428 } 429#endif 430 431 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { 432 *pcstack++ = (uint64_t)pc; 433 *fpstack++ = 0; 434 pcstack_limit--; 435 if (pcstack_limit <= 0) 436 return; 437 438 if (is64Bit) 439 pc = dtrace_fuword64(sp); 440 else 441 pc = dtrace_fuword32(sp); 442 } 443 444 while (pc != 0) { 445 *pcstack++ = (uint64_t)pc; 446 *fpstack++ = sp; 447 pcstack_limit--; 448 if (pcstack_limit <= 0) 449 break; 450 451 if (sp == 0) 452 break; 453 454#if 0 /* XXX signal stack crawl */ 455 if (oldcontext == sp + s1 || oldcontext == sp + s2) { 456 if (p->p_model == DATAMODEL_NATIVE) { 457 ucontext_t *ucp = (ucontext_t *)oldcontext; 458 greg_t *gregs = ucp->uc_mcontext.gregs; 459 460 sp = dtrace_fulword(&gregs[REG_FP]); 461 pc = dtrace_fulword(&gregs[REG_PC]); 462 463 oldcontext = dtrace_fulword(&ucp->uc_link); 464 } else { 465 ucontext_t *ucp = (ucontext_t *)oldcontext; 466 greg_t *gregs = ucp->uc_mcontext.gregs; 467 468 sp = dtrace_fuword32(&gregs[EBP]); 469 pc = dtrace_fuword32(&gregs[EIP]); 470 471 oldcontext = dtrace_fuword32(&ucp->uc_link); 472 } 473 } 474 else 475#endif 476 { 477 if (is64Bit) { 478 pc = dtrace_fuword64((sp + RETURN_OFFSET64)); 479 sp = dtrace_fuword64(sp); 480 } else { 481 pc = dtrace_fuword32((sp + RETURN_OFFSET)); 482 sp = dtrace_fuword32(sp); 483 } 484 } 485 486#if 0 /* XXX */ 487 /* 488 * This is totally bogus: if we faulted, we're going to clear 489 * the fault and break. This is to deal with the apparently 490 * broken Java stacks on x86. 491 */ 492 if (*flags & CPU_DTRACE_FAULT) { 493 *flags &= ~CPU_DTRACE_FAULT; 494 break; 495 } 496#endif 497 } 498 499zero: 500 while (pcstack_limit-- > 0) 501 *pcstack++ = 0; 502} 503 504void 505dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes, 506 uint32_t *intrpc) 507{ 508 struct frame *fp = (struct frame *)dtrace_getfp(); 509 struct frame *nextfp, *minfp, *stacktop; 510 int depth = 0; 511 int last = 0; 512 uintptr_t pc; 513 uintptr_t caller = CPU->cpu_dtrace_caller; 514 int on_intr; 515 516 if ((on_intr = CPU_ON_INTR(CPU)) != 0) 517 stacktop = (struct frame *)dtrace_get_cpu_int_stack_top(); 518 else 519 stacktop = (struct frame *)(dtrace_get_kernel_stack(current_thread()) + KERNEL_STACK_SIZE); 520 521 minfp = fp; 522 523 aframes++; 524 525 if (intrpc != NULL && depth < pcstack_limit) 526 pcstack[depth++] = (pc_t)intrpc; 527 528 while (depth < pcstack_limit) { 529 nextfp = *(struct frame **)fp; 530 pc = *(uintptr_t *)(((uint32_t)fp) + RETURN_OFFSET); 531 532 if (nextfp <= minfp || nextfp >= stacktop) { 533 if (on_intr) { 534 /* 535 * Hop from interrupt stack to thread stack. 536 */ 537 vm_offset_t kstack_base = dtrace_get_kernel_stack(current_thread()); 538 539 minfp = (struct frame *)kstack_base; 540 stacktop = (struct frame *)(kstack_base + KERNEL_STACK_SIZE); 541 542 on_intr = 0; 543 continue; 544 } 545 /* 546 * This is the last frame we can process; indicate 547 * that we should return after processing this frame. 548 */ 549 last = 1; 550 } 551 552 if (aframes > 0) { 553 if (--aframes == 0 && caller != 0) { 554 /* 555 * We've just run out of artificial frames, 556 * and we have a valid caller -- fill it in 557 * now. 558 */ 559 ASSERT(depth < pcstack_limit); 560 pcstack[depth++] = (pc_t)caller; 561 caller = 0; 562 } 563 } else { 564 if (depth < pcstack_limit) 565 pcstack[depth++] = (pc_t)pc; 566 } 567 568 if (last) { 569 while (depth < pcstack_limit) 570 pcstack[depth++] = 0; 571 return; 572 } 573 574 fp = nextfp; 575 minfp = fp; 576 } 577} 578 579struct frame { 580 struct frame *backchain; 581 uintptr_t retaddr; 582}; 583 584uint64_t 585dtrace_getarg(int arg, int aframes) 586{ 587 uint64_t val; 588 struct frame *fp = (struct frame *)dtrace_getfp(); 589 uintptr_t *stack; 590 uintptr_t pc; 591 int i; 592 593 for (i = 1; i <= aframes; i++) { 594 fp = fp->backchain; 595 pc = fp->retaddr; 596 597 if (pc == (uintptr_t)dtrace_invop_callsite) { 598 /* 599 * If we pass through the invalid op handler, we will 600 * use the pointer that it passed to the stack as the 601 * second argument to dtrace_invop() as the pointer to 602 * the frame we're hunting for. 603 */ 604 605 stack = (uintptr_t *)&fp[1]; /* Find marshalled arguments */ 606 fp = (struct frame *)stack[1]; /* Grab *second* argument */ 607 stack = (uintptr_t *)&fp[1]; /* Find marshalled arguments */ 608 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 609 val = (uint64_t)(stack[arg]); 610 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 611 return val; 612 } 613 } 614 615 /* 616 * Arrive here when provider has called dtrace_probe directly. 617 */ 618 stack = (uintptr_t *)&fp[1]; /* Find marshalled arguments */ 619 stack++; /* Advance past probeID */ 620 621 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 622 val = *(((uint64_t *)stack) + arg); /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */ 623 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 624 625 return (val); 626} 627 628/* 629 * Load/Store Safety 630 */ 631void 632dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit)) 633{ 634 /* 635 * "base" is the smallest toxic address in the range, "limit" is the first 636 * VALID address greater than "base". 637 */ 638 func(0x0, VM_MIN_KERNEL_ADDRESS); 639 func(VM_MAX_KERNEL_ADDRESS + 1, ~(uintptr_t)0); 640} 641 642extern boolean_t pmap_valid_page(ppnum_t pn); 643 644boolean_t 645dtxnu_is_RAM_page(ppnum_t pn) 646{ 647 return pmap_valid_page(pn); 648} 649 650