1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22/* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27/* #pragma ident "@(#)fbt.c 1.15 05/09/19 SMI" */ 28 29#ifdef KERNEL 30#ifndef _KERNEL 31#define _KERNEL /* Solaris vs. Darwin */ 32#endif 33#endif 34 35#define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */ 36#include <kern/thread.h> 37#include <mach/thread_status.h> 38#include <mach/vm_param.h> 39#include <mach-o/loader.h> 40#include <mach-o/nlist.h> 41#include <libkern/kernel_mach_header.h> 42#include <libkern/OSAtomic.h> 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/errno.h> 47#include <sys/stat.h> 48#include <sys/ioctl.h> 49#include <sys/conf.h> 50#include <sys/fcntl.h> 51#include <miscfs/devfs/devfs.h> 52 53#include <sys/dtrace.h> 54#include <sys/dtrace_impl.h> 55#include <sys/fbt.h> 56 57#include <sys/dtrace_glue.h> 58 59#define DTRACE_INVOP_NOP_SKIP 1 60#define DTRACE_INVOP_MOVL_ESP_EBP 10 61#define DTRACE_INVOP_MOVL_ESP_EBP_SKIP 2 62#define DTRACE_INVOP_MOV_RSP_RBP 11 63#define DTRACE_INVOP_MOV_RSP_RBP_SKIP 3 64#define DTRACE_INVOP_POP_RBP 12 65#define DTRACE_INVOP_POP_RBP_SKIP 1 66#define DTRACE_INVOP_LEAVE_SKIP 1 67 68#define FBT_PUSHL_EBP 0x55 69#define FBT_MOVL_ESP_EBP0_V0 0x8b 70#define FBT_MOVL_ESP_EBP1_V0 0xec 71#define FBT_MOVL_ESP_EBP0_V1 0x89 72#define FBT_MOVL_ESP_EBP1_V1 0xe5 73 74#define FBT_PUSH_RBP 0x55 75#define FBT_REX_RSP_RBP 0x48 76#define FBT_MOV_RSP_RBP0 0x89 77#define FBT_MOV_RSP_RBP1 0xe5 78#define FBT_POP_RBP 0x5d 79 80#define FBT_POPL_EBP 0x5d 81#define FBT_RET 0xc3 82#define FBT_RET_IMM16 0xc2 83#define FBT_LEAVE 0xc9 84#define FBT_JMP_SHORT_REL 0xeb /* Jump short, relative, displacement relative to next instr. */ 85#define FBT_JMP_NEAR_REL 0xe9 /* Jump near, relative, displacement relative to next instr. */ 86#define FBT_JMP_FAR_ABS 0xea /* Jump far, absolute, address given in operand */ 87#define FBT_RET_LEN 1 88#define FBT_RET_IMM16_LEN 3 89#define FBT_JMP_SHORT_REL_LEN 2 90#define FBT_JMP_NEAR_REL_LEN 5 91#define FBT_JMP_FAR_ABS_LEN 5 92 93#define FBT_PATCHVAL 0xf0 94#define FBT_AFRAMES_ENTRY 7 95#define FBT_AFRAMES_RETURN 6 96 97#define FBT_ENTRY "entry" 98#define FBT_RETURN "return" 99#define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask) 100 101extern dtrace_provider_id_t fbt_id; 102extern fbt_probe_t **fbt_probetab; 103extern int fbt_probetab_mask; 104 105extern int gIgnoreFBTBlacklist; /* From fbt_init */ 106 107kern_return_t fbt_perfCallback(int, x86_saved_state_t *, uintptr_t *, __unused int); 108 109/* 110 * Critical routines that must not be probed. PR_5221096, PR_5379018. 111 * The blacklist must be kept in alphabetic order for purposes of bsearch(). 112 */ 113 114static const char * critical_blacklist[] = 115{ 116 "bcopy_phys", 117 "console_cpu_alloc", 118 "console_cpu_free", 119 "cpu_IA32e_disable", 120 "cpu_IA32e_enable", 121 "cpu_NMI_interrupt", 122 "cpu_control", 123 "cpu_data_alloc", 124 "cpu_desc_init", 125 "cpu_desc_init64", 126 "cpu_desc_load", 127 "cpu_desc_load64", 128 "cpu_exit_wait", 129 "cpu_info", 130 "cpu_info_count", 131 "cpu_init", 132 "cpu_interrupt", 133 "cpu_machine_init", 134 "cpu_mode_init", 135 "cpu_processor_alloc", 136 "cpu_processor_free", 137 "cpu_signal_handler", 138 "cpu_sleep", 139 "cpu_start", 140 "cpu_subtype", 141 "cpu_thread_alloc", 142 "cpu_thread_halt", 143 "cpu_thread_init", 144 "cpu_threadtype", 145 "cpu_to_processor", 146 "cpu_topology_sort", 147 "cpu_topology_start_cpu", 148 "cpu_type", 149 "cpuid_cpu_display", 150 "cpuid_extfeatures", 151 "handle_pending_TLB_flushes", 152 "hw_compare_and_store", 153 "machine_idle_cstate", 154 "mca_cpu_alloc", 155 "mca_cpu_init", 156 "ml_nofault_copy", 157 "pmap_cpu_alloc", 158 "pmap_cpu_free", 159 "pmap_cpu_high_map_vaddr", 160 "pmap_cpu_high_shared_remap", 161 "pmap_cpu_init", 162 "register_cpu_setup_func", 163 "unregister_cpu_setup_func", 164 "vstart" 165}; 166#define CRITICAL_BLACKLIST_COUNT (sizeof(critical_blacklist)/sizeof(critical_blacklist[0])) 167 168/* 169 * The transitive closure of entry points that can be reached from probe context. 170 * (Apart from routines whose names begin with dtrace_). 171 */ 172static const char * probe_ctx_closure[] = 173{ 174 "Debugger", 175 "IS_64BIT_PROCESS", 176 "OSCompareAndSwap", 177 "absolutetime_to_microtime", 178 "act_set_astbsd", 179 "ast_pending", 180 "clock_get_calendar_nanotime_nowait", 181 "copyin", 182 "copyin_user", 183 "copyinstr", 184 "copyout", 185 "copyoutstr", 186 "cpu_number", 187 "current_proc", 188 "current_processor", 189 "current_task", 190 "current_thread", 191 "debug_enter", 192 "find_user_regs", 193 "flush_tlb64", 194 "get_bsdtask_info", 195 "get_bsdthread_info", 196 "hw_atomic_and", 197 "kauth_cred_get", 198 "kauth_getgid", 199 "kauth_getuid", 200 "kernel_preempt_check", 201 "mach_absolute_time", 202 "max_valid_stack_address", 203 "ml_at_interrupt_context", 204 "ml_phys_write_byte_64", 205 "ml_phys_write_half_64", 206 "ml_phys_write_word_64", 207 "ml_set_interrupts_enabled", 208 "panic", 209 "pmap64_pde", 210 "pmap64_pdpt", 211 "pmap_find_phys", 212 "pmap_get_mapwindow", 213 "pmap_pde", 214 "pmap_pte", 215 "pmap_put_mapwindow", 216 "pmap_valid_page", 217 "prf", 218 "proc_is64bit", 219 "proc_selfname", 220 "psignal_lock", 221 "rtc_nanotime_load", 222 "rtc_nanotime_read", 223 "sdt_getargdesc", 224 "strlcpy", 225 "sync_iss_to_iks_unconditionally", 226 "systrace_stub", 227 "timer_grab" 228}; 229#define PROBE_CTX_CLOSURE_COUNT (sizeof(probe_ctx_closure)/sizeof(probe_ctx_closure[0])) 230 231 232static int _cmp(const void *a, const void *b) 233{ 234 return strncmp((const char *)a, *(const char **)b, strlen((const char *)a) + 1); 235} 236 237static const void * bsearch( 238 register const void *key, 239 const void *base0, 240 size_t nmemb, 241 register size_t size, 242 register int (*compar)(const void *, const void *)) { 243 244 register const char *base = base0; 245 register size_t lim; 246 register int cmp; 247 register const void *p; 248 249 for (lim = nmemb; lim != 0; lim >>= 1) { 250 p = base + (lim >> 1) * size; 251 cmp = (*compar)(key, p); 252 if (cmp == 0) 253 return p; 254 if (cmp > 0) { /* key > p: move right */ 255 base = (const char *)p + size; 256 lim--; 257 } /* else move left */ 258 } 259 return (NULL); 260} 261 262/* 263 * Module validation 264 */ 265static int 266is_module_valid(struct modctl* ctl) 267{ 268 ASSERT(!MOD_FBT_PROBES_PROVIDED(ctl)); 269 ASSERT(!MOD_FBT_INVALID(ctl)); 270 271 if (0 == ctl->mod_address || 0 == ctl->mod_size) { 272 return FALSE; 273 } 274 275 if (0 == ctl->mod_loaded) { 276 return FALSE; 277 } 278 279 if (strstr(ctl->mod_modname, "CHUD") != NULL) 280 return FALSE; 281 282 /* 283 * If the user sets this, trust they know what they are doing. 284 */ 285 if (gIgnoreFBTBlacklist) /* per boot-arg set in fbt_init() */ 286 return TRUE; 287 288 /* 289 * These drivers control low level functions that when traced 290 * cause problems often in the sleep/wake paths as well as 291 * critical debug and panic paths. 292 * If somebody really wants to drill in on one of these kexts, then 293 * they can override blacklisting using the boot-arg above. 294 */ 295 296 if (strstr(ctl->mod_modname, "AppleACPIEC") != NULL) 297 return FALSE; 298 299 if (strstr(ctl->mod_modname, "AppleACPIPlatform") != NULL) 300 return FALSE; 301 302 if (strstr(ctl->mod_modname, "AppleRTC") != NULL) 303 return FALSE; 304 305 if (strstr(ctl->mod_modname, "IOACPIFamily") != NULL) 306 return FALSE; 307 308 if (strstr(ctl->mod_modname, "AppleIntelCPUPowerManagement") != NULL) 309 return FALSE; 310 311 if (strstr(ctl->mod_modname, "AppleProfile") != NULL) 312 return FALSE; 313 314 if (strstr(ctl->mod_modname, "AppleIntelProfile") != NULL) 315 return FALSE; 316 317 if (strstr(ctl->mod_modname, "AppleEFI") != NULL) 318 return FALSE; 319 320 return TRUE; 321} 322 323/* 324 * FBT probe name validation 325 */ 326static int 327is_symbol_valid(const char* name) 328{ 329 /* 330 * If the user set this, trust they know what they are doing. 331 */ 332 if (gIgnoreFBTBlacklist) 333 return TRUE; 334 335 if (LIT_STRNSTART(name, "dtrace_") && !LIT_STRNSTART(name, "dtrace_safe_")) { 336 /* 337 * Anything beginning with "dtrace_" may be called 338 * from probe context unless it explitly indicates 339 * that it won't be called from probe context by 340 * using the prefix "dtrace_safe_". 341 */ 342 return FALSE; 343 } 344 345 if (LIT_STRNSTART(name, "fasttrap_") || 346 LIT_STRNSTART(name, "fuword") || 347 LIT_STRNSTART(name, "suword") || 348 LIT_STRNEQL(name, "sprlock") || 349 LIT_STRNEQL(name, "sprunlock") || 350 LIT_STRNEQL(name, "uread") || 351 LIT_STRNEQL(name, "uwrite")) { 352 return FALSE; /* Fasttrap inner-workings. */ 353 } 354 355 if (LIT_STRNSTART(name, "dsmos_")) 356 return FALSE; /* Don't Steal Mac OS X! */ 357 358 if (LIT_STRNSTART(name, "_dtrace")) 359 return FALSE; /* Shims in dtrace.c */ 360 361 if (LIT_STRNSTART(name, "chud")) 362 return FALSE; /* Professional courtesy. */ 363 364 if (LIT_STRNSTART(name, "hibernate_")) 365 return FALSE; /* Let sleeping dogs lie. */ 366 367 if (LIT_STRNEQL(name, "_ZNK6OSData14getBytesNoCopyEv")) 368 return FALSE; /* Data::getBytesNoCopy, IOHibernateSystemWake path */ 369 370 if (LIT_STRNEQL(name, "_ZN9IOService14newTemperatureElPS_") || /* IOService::newTemperature */ 371 LIT_STRNEQL(name, "_ZN9IOService26temperatureCriticalForZoneEPS_")) { /* IOService::temperatureCriticalForZone */ 372 return FALSE; /* Per the fire code */ 373 } 374 375 /* 376 * Place no probes (illegal instructions) in the exception handling path! 377 */ 378 if (LIT_STRNEQL(name, "t_invop") || 379 LIT_STRNEQL(name, "enter_lohandler") || 380 LIT_STRNEQL(name, "lo_alltraps") || 381 LIT_STRNEQL(name, "kernel_trap") || 382 LIT_STRNEQL(name, "interrupt") || 383 LIT_STRNEQL(name, "i386_astintr")) { 384 return FALSE; 385 } 386 387 if (LIT_STRNEQL(name, "current_thread") || 388 LIT_STRNEQL(name, "ast_pending") || 389 LIT_STRNEQL(name, "fbt_perfCallback") || 390 LIT_STRNEQL(name, "machine_thread_get_kern_state") || 391 LIT_STRNEQL(name, "get_threadtask") || 392 LIT_STRNEQL(name, "ml_set_interrupts_enabled") || 393 LIT_STRNEQL(name, "dtrace_invop") || 394 LIT_STRNEQL(name, "fbt_invop") || 395 LIT_STRNEQL(name, "sdt_invop") || 396 LIT_STRNEQL(name, "max_valid_stack_address")) { 397 return FALSE; 398 } 399 400 /* 401 * Voodoo. 402 */ 403 if (LIT_STRNSTART(name, "machine_stack_") || 404 LIT_STRNSTART(name, "mapping_") || 405 LIT_STRNEQL(name, "tmrCvt") || 406 407 LIT_STRNSTART(name, "tsc_") || 408 409 LIT_STRNSTART(name, "pmCPU") || 410 LIT_STRNEQL(name, "pmKextRegister") || 411 LIT_STRNEQL(name, "pmMarkAllCPUsOff") || 412 LIT_STRNEQL(name, "pmSafeMode") || 413 LIT_STRNEQL(name, "pmTimerSave") || 414 LIT_STRNEQL(name, "pmTimerRestore") || 415 LIT_STRNEQL(name, "pmUnRegister") || 416 LIT_STRNSTART(name, "pms") || 417 LIT_STRNEQL(name, "power_management_init") || 418 LIT_STRNSTART(name, "usimple_") || 419 LIT_STRNSTART(name, "lck_spin_lock") || 420 LIT_STRNSTART(name, "lck_spin_unlock") || 421 422 LIT_STRNSTART(name, "rtc_") || 423 LIT_STRNSTART(name, "_rtc_") || 424 LIT_STRNSTART(name, "rtclock_") || 425 LIT_STRNSTART(name, "clock_") || 426 LIT_STRNSTART(name, "absolutetime_to_") || 427 LIT_STRNEQL(name, "setPop") || 428 LIT_STRNEQL(name, "nanoseconds_to_absolutetime") || 429 LIT_STRNEQL(name, "nanotime_to_absolutetime") || 430 431 LIT_STRNSTART(name, "etimer_") || 432 433 LIT_STRNSTART(name, "commpage_") || 434 LIT_STRNSTART(name, "pmap_") || 435 LIT_STRNSTART(name, "ml_") || 436 LIT_STRNSTART(name, "PE_") || 437 LIT_STRNEQL(name, "kprintf") || 438 LIT_STRNSTART(name, "lapic_") || 439 LIT_STRNSTART(name, "act_machine") || 440 LIT_STRNSTART(name, "acpi_") || 441 LIT_STRNSTART(name, "pal_")){ 442 return FALSE; 443 } 444 445 /* 446 * Avoid machine_ routines. PR_5346750. 447 */ 448 if (LIT_STRNSTART(name, "machine_")) 449 return FALSE; 450 451 if (LIT_STRNEQL(name, "handle_pending_TLB_flushes")) 452 return FALSE; 453 454 /* 455 * Place no probes on critical routines. PR_5221096 456 */ 457 if (bsearch( name, critical_blacklist, CRITICAL_BLACKLIST_COUNT, sizeof(name), _cmp ) != NULL) 458 return FALSE; 459 460 /* 461 * Place no probes that could be hit in probe context. 462 */ 463 if (bsearch( name, probe_ctx_closure, PROBE_CTX_CLOSURE_COUNT, sizeof(name), _cmp ) != NULL) { 464 return FALSE; 465 } 466 467 /* 468 * Place no probes that could be hit on the way to the debugger. 469 */ 470 if (LIT_STRNSTART(name, "kdp_") || 471 LIT_STRNSTART(name, "kdb_") || 472 LIT_STRNSTART(name, "kdbg_") || 473 LIT_STRNSTART(name, "kdebug_") || 474 LIT_STRNSTART(name, "kernel_debug") || 475 LIT_STRNSTART(name, "debug_") || 476 LIT_STRNEQL(name, "Debugger") || 477 LIT_STRNEQL(name, "Call_DebuggerC") || 478 LIT_STRNEQL(name, "lock_debugger") || 479 LIT_STRNEQL(name, "unlock_debugger") || 480 LIT_STRNEQL(name, "packA") || 481 LIT_STRNEQL(name, "unpackA") || 482 LIT_STRNEQL(name, "SysChoked")) { 483 return FALSE; 484 } 485 486 487 /* 488 * Place no probes that could be hit on the way to a panic. 489 */ 490 if (NULL != strstr(name, "panic_") || 491 LIT_STRNEQL(name, "panic") || 492 LIT_STRNEQL(name, "preemption_underflow_panic")) { 493 return FALSE; 494 } 495 496 return TRUE; 497} 498 499int 500fbt_invop(uintptr_t addr, uintptr_t *state, uintptr_t rval) 501{ 502 fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)]; 503 504 for (; fbt != NULL; fbt = fbt->fbtp_hashnext) { 505 if ((uintptr_t)fbt->fbtp_patchpoint == addr) { 506 507 if (fbt->fbtp_roffset == 0) { 508 x86_saved_state64_t *regs = (x86_saved_state64_t *)state; 509 510 CPU->cpu_dtrace_caller = *(uintptr_t *)(((uintptr_t)(regs->isf.rsp))+sizeof(uint64_t)); // 8(%rsp) 511 /* 64-bit ABI, arguments passed in registers. */ 512 dtrace_probe(fbt->fbtp_id, regs->rdi, regs->rsi, regs->rdx, regs->rcx, regs->r8); 513 CPU->cpu_dtrace_caller = 0; 514 } else { 515 516 dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0); 517 CPU->cpu_dtrace_caller = 0; 518 } 519 520 return (fbt->fbtp_rval); 521 } 522 } 523 524 return (0); 525} 526 527#define IS_USER_TRAP(regs) (regs && (((regs)->isf.cs & 3) != 0)) 528#define T_INVALID_OPCODE 6 529#define FBT_EXCEPTION_CODE T_INVALID_OPCODE 530#define T_PREEMPT 255 531 532kern_return_t 533fbt_perfCallback( 534 int trapno, 535 x86_saved_state_t *tagged_regs, 536 uintptr_t *lo_spp, 537 __unused int unused2) 538{ 539 kern_return_t retval = KERN_FAILURE; 540 x86_saved_state64_t *saved_state = saved_state64(tagged_regs); 541 542 if (FBT_EXCEPTION_CODE == trapno && !IS_USER_TRAP(saved_state)) { 543 boolean_t oldlevel; 544 uint64_t rsp_probe, fp, delta = 0; 545 uintptr_t old_sp; 546 uint32_t *pDst; 547 int emul; 548 549 550 oldlevel = ml_set_interrupts_enabled(FALSE); 551 552 /* Calculate where the stack pointer was when the probe instruction "fired." */ 553 rsp_probe = saved_state->isf.rsp; /* Easy, x86_64 establishes this value in idt64.s */ 554 555 __asm__ volatile( 556 "Ldtrace_invop_callsite_pre_label:\n" 557 ".data\n" 558 ".private_extern _dtrace_invop_callsite_pre\n" 559 "_dtrace_invop_callsite_pre:\n" 560 " .quad Ldtrace_invop_callsite_pre_label\n" 561 ".text\n" 562 ); 563 564 emul = dtrace_invop( saved_state->isf.rip, (uintptr_t *)saved_state, saved_state->rax ); 565 566 __asm__ volatile( 567 "Ldtrace_invop_callsite_post_label:\n" 568 ".data\n" 569 ".private_extern _dtrace_invop_callsite_post\n" 570 "_dtrace_invop_callsite_post:\n" 571 " .quad Ldtrace_invop_callsite_post_label\n" 572 ".text\n" 573 ); 574 575 switch (emul) { 576 case DTRACE_INVOP_NOP: 577 saved_state->isf.rip += DTRACE_INVOP_NOP_SKIP; /* Skip over the patched NOP (planted by sdt). */ 578 retval = KERN_SUCCESS; 579 break; 580 581 case DTRACE_INVOP_MOV_RSP_RBP: 582 saved_state->rbp = rsp_probe; /* Emulate patched mov %rsp,%rbp */ 583 saved_state->isf.rip += DTRACE_INVOP_MOV_RSP_RBP_SKIP; /* Skip over the bytes of the patched mov %rsp,%rbp */ 584 retval = KERN_SUCCESS; 585 break; 586 587 case DTRACE_INVOP_POP_RBP: 588 case DTRACE_INVOP_LEAVE: 589/* 590 * Emulate first micro-op of patched leave: mov %rbp,%rsp 591 * fp points just below the return address slot for target's ret 592 * and at the slot holding the frame pointer saved by the target's prologue. 593 */ 594 fp = saved_state->rbp; 595/* Emulate second micro-op of patched leave: patched pop %rbp 596 * savearea rbp is set for the frame of the caller to target 597 * The *live* %rsp will be adjusted below for pop increment(s) 598 */ 599 saved_state->rbp = *(uint64_t *)fp; 600/* Skip over the patched leave */ 601 saved_state->isf.rip += DTRACE_INVOP_LEAVE_SKIP; 602/* 603 * Lift the stack to account for the emulated leave 604 * Account for words local in this frame 605 * (in "case DTRACE_INVOP_POPL_EBP:" this is zero.) 606 */ 607 delta = ((uint32_t *)fp) - ((uint32_t *)rsp_probe); /* delta is a *word* increment */ 608/* Account for popping off the rbp (just accomplished by the emulation 609 * above...) 610 */ 611 delta += 2; 612 saved_state->isf.rsp += (delta << 2); 613/* Obtain the stack pointer recorded by the trampolines */ 614 old_sp = *lo_spp; 615/* Shift contents of stack */ 616 for (pDst = (uint32_t *)fp; 617 pDst > (((uint32_t *)old_sp)); 618 pDst--) 619 *pDst = pDst[-delta]; 620 621/* Track the stack lift in "saved_state". */ 622 saved_state = (x86_saved_state64_t *) (((uintptr_t)saved_state) + (delta << 2)); 623/* Adjust the stack pointer utilized by the trampolines */ 624 *lo_spp = old_sp + (delta << 2); 625 626 retval = KERN_SUCCESS; 627 break; 628 629 default: 630 retval = KERN_FAILURE; 631 break; 632 } 633 saved_state->isf.trapno = T_PREEMPT; /* Avoid call to i386_astintr()! */ 634 635 ml_set_interrupts_enabled(oldlevel); 636 } 637 638 return retval; 639} 640 641/*ARGSUSED*/ 642static void 643__provide_probe_64(struct modctl *ctl, uintptr_t instrLow, uintptr_t instrHigh, char *modname, char* symbolName, machine_inst_t* symbolStart) 644{ 645 unsigned int j; 646 unsigned int doenable = 0; 647 dtrace_id_t thisid; 648 649 fbt_probe_t *newfbt, *retfbt, *entryfbt; 650 machine_inst_t *instr, *limit, theInstr, i1, i2, i3; 651 int size; 652 653 for (j = 0, instr = symbolStart, theInstr = 0; 654 (j < 4) && ((uintptr_t)instr >= instrLow) && (instrHigh > (uintptr_t)(instr + 2)); 655 j++) { 656 theInstr = instr[0]; 657 if (theInstr == FBT_PUSH_RBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16) 658 break; 659 660 if ((size = dtrace_instr_size(instr)) <= 0) 661 break; 662 663 instr += size; 664 } 665 666 if (theInstr != FBT_PUSH_RBP) 667 return; 668 669 i1 = instr[1]; 670 i2 = instr[2]; 671 i3 = instr[3]; 672 673 limit = (machine_inst_t *)instrHigh; 674 675 if (i1 == FBT_REX_RSP_RBP && i2 == FBT_MOV_RSP_RBP0 && i3 == FBT_MOV_RSP_RBP1) { 676 instr += 1; /* Advance to the mov %rsp,%rbp */ 677 theInstr = i1; 678 } else { 679 return; 680 } 681#if 0 682 else { 683 /* 684 * Sometimes, the compiler will schedule an intervening instruction 685 * in the function prologue. Example: 686 * 687 * _mach_vm_read: 688 * 000006d8 pushl %ebp 689 * 000006d9 movl $0x00000004,%edx 690 * 000006de movl %esp,%ebp 691 * 692 * Try the next instruction, to see if it is a movl %esp,%ebp 693 */ 694 695 instr += 1; /* Advance past the pushl %ebp */ 696 if ((size = dtrace_instr_size(instr)) <= 0) 697 return; 698 699 instr += size; 700 701 if ((instr + 1) >= limit) 702 return; 703 704 i1 = instr[0]; 705 i2 = instr[1]; 706 707 if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) && 708 !(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1)) 709 return; 710 711 /* instr already points at the movl %esp,%ebp */ 712 theInstr = i1; 713 } 714#endif 715 thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_ENTRY); 716 newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP); 717 strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS ); 718 719 if (thisid != 0) { 720 /* 721 * The dtrace_probe previously existed, so we have to hook 722 * the newfbt entry onto the end of the existing fbt's chain. 723 * If we find an fbt entry that was previously patched to 724 * fire, (as indicated by the current patched value), then 725 * we want to enable this newfbt on the spot. 726 */ 727 entryfbt = dtrace_probe_arg (fbt_id, thisid); 728 ASSERT (entryfbt != NULL); 729 for(; entryfbt != NULL; entryfbt = entryfbt->fbtp_next) { 730 if (entryfbt->fbtp_currentval == entryfbt->fbtp_patchval) 731 doenable++; 732 733 if (entryfbt->fbtp_next == NULL) { 734 entryfbt->fbtp_next = newfbt; 735 newfbt->fbtp_id = entryfbt->fbtp_id; 736 break; 737 } 738 } 739 } 740 else { 741 /* 742 * The dtrace_probe did not previously exist, so we 743 * create it and hook in the newfbt. Since the probe is 744 * new, we obviously do not need to enable it on the spot. 745 */ 746 newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname, symbolName, FBT_ENTRY, FBT_AFRAMES_ENTRY, newfbt); 747 doenable = 0; 748 } 749 750 newfbt->fbtp_patchpoint = instr; 751 newfbt->fbtp_ctl = ctl; 752 newfbt->fbtp_loadcnt = ctl->mod_loadcnt; 753 newfbt->fbtp_rval = DTRACE_INVOP_MOV_RSP_RBP; 754 newfbt->fbtp_savedval = theInstr; 755 newfbt->fbtp_patchval = FBT_PATCHVAL; 756 newfbt->fbtp_currentval = 0; 757 newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; 758 fbt_probetab[FBT_ADDR2NDX(instr)] = newfbt; 759 760 if (doenable) 761 fbt_enable(NULL, newfbt->fbtp_id, newfbt); 762 763 /* 764 * The fbt entry chain is in place, one entry point per symbol. 765 * The fbt return chain can have multiple return points per symbol. 766 * Here we find the end of the fbt return chain. 767 */ 768 769 doenable=0; 770 771 thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_RETURN); 772 if (thisid != 0) { 773 /* The dtrace_probe previously existed, so we have to 774 * find the end of the existing fbt chain. If we find 775 * an fbt return that was previously patched to fire, 776 * (as indicated by the currrent patched value), then 777 * we want to enable any new fbts on the spot. 778 */ 779 retfbt = dtrace_probe_arg (fbt_id, thisid); 780 ASSERT(retfbt != NULL); 781 for (; retfbt != NULL; retfbt = retfbt->fbtp_next) { 782 if (retfbt->fbtp_currentval == retfbt->fbtp_patchval) 783 doenable++; 784 if(retfbt->fbtp_next == NULL) 785 break; 786 } 787 } 788 else { 789 doenable = 0; 790 retfbt = NULL; 791 } 792 793again: 794 if (instr >= limit) 795 return; 796 797 /* 798 * If this disassembly fails, then we've likely walked off into 799 * a jump table or some other unsuitable area. Bail out of the 800 * disassembly now. 801 */ 802 if ((size = dtrace_instr_size(instr)) <= 0) 803 return; 804 805 /* 806 * We (desperately) want to avoid erroneously instrumenting a 807 * jump table, especially given that our markers are pretty 808 * short: two bytes on x86, and just one byte on amd64. To 809 * determine if we're looking at a true instruction sequence 810 * or an inline jump table that happens to contain the same 811 * byte sequences, we resort to some heuristic sleeze: we 812 * treat this instruction as being contained within a pointer, 813 * and see if that pointer points to within the body of the 814 * function. If it does, we refuse to instrument it. 815 */ 816 for (j = 0; j < sizeof (uintptr_t); j++) { 817 uintptr_t check = (uintptr_t)instr - j; 818 uint8_t *ptr; 819 820 if (check < (uintptr_t)symbolStart) 821 break; 822 823 if (check + sizeof (uintptr_t) > (uintptr_t)limit) 824 continue; 825 826 ptr = *(uint8_t **)check; 827 828 if (ptr >= (uint8_t *)symbolStart && ptr < limit) { 829 instr += size; 830 goto again; 831 } 832 } 833 834 /* 835 * OK, it's an instruction. 836 */ 837 theInstr = instr[0]; 838 839 /* Walked onto the start of the next routine? If so, bail out of this function. */ 840 if (theInstr == FBT_PUSH_RBP) 841 return; 842 843 if (!(size == 1 && (theInstr == FBT_POP_RBP || theInstr == FBT_LEAVE))) { 844 instr += size; 845 goto again; 846 } 847 848 /* 849 * Found the pop %rbp; or leave. 850 */ 851 machine_inst_t *patch_instr = instr; 852 853 /* 854 * Scan forward for a "ret", or "jmp". 855 */ 856 instr += size; 857 if (instr >= limit) 858 return; 859 860 size = dtrace_instr_size(instr); 861 if (size <= 0) /* Failed instruction decode? */ 862 return; 863 864 theInstr = instr[0]; 865 866 if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) && 867 !(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) && 868 !(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) && 869 !(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) && 870 !(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS))) 871 return; 872 873 /* 874 * pop %rbp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner! 875 */ 876 newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP); 877 strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS ); 878 879 if (retfbt == NULL) { 880 newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname, 881 symbolName, FBT_RETURN, FBT_AFRAMES_RETURN, newfbt); 882 } else { 883 retfbt->fbtp_next = newfbt; 884 newfbt->fbtp_id = retfbt->fbtp_id; 885 } 886 887 retfbt = newfbt; 888 newfbt->fbtp_patchpoint = patch_instr; 889 newfbt->fbtp_ctl = ctl; 890 newfbt->fbtp_loadcnt = ctl->mod_loadcnt; 891 892 if (*patch_instr == FBT_POP_RBP) { 893 newfbt->fbtp_rval = DTRACE_INVOP_POP_RBP; 894 } else { 895 ASSERT(*patch_instr == FBT_LEAVE); 896 newfbt->fbtp_rval = DTRACE_INVOP_LEAVE; 897 } 898 newfbt->fbtp_roffset = 899 (uintptr_t)(patch_instr - (uint8_t *)symbolStart); 900 901 newfbt->fbtp_savedval = *patch_instr; 902 newfbt->fbtp_patchval = FBT_PATCHVAL; 903 newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)]; 904 fbt_probetab[FBT_ADDR2NDX(patch_instr)] = newfbt; 905 906 if (doenable) 907 fbt_enable(NULL, newfbt->fbtp_id, newfbt); 908 909 instr += size; 910 goto again; 911} 912 913static void 914__kernel_syms_provide_module(void *arg, struct modctl *ctl) 915{ 916#pragma unused(arg) 917 kernel_mach_header_t *mh; 918 struct load_command *cmd; 919 kernel_segment_command_t *orig_ts = NULL, *orig_le = NULL; 920 struct symtab_command *orig_st = NULL; 921 kernel_nlist_t *sym = NULL; 922 char *strings; 923 uintptr_t instrLow, instrHigh; 924 char *modname; 925 unsigned int i; 926 927 mh = (kernel_mach_header_t *)(ctl->mod_address); 928 modname = ctl->mod_modname; 929 930 if (mh->magic != MH_MAGIC_KERNEL) 931 return; 932 933 cmd = (struct load_command *) &mh[1]; 934 for (i = 0; i < mh->ncmds; i++) { 935 if (cmd->cmd == LC_SEGMENT_KERNEL) { 936 kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd; 937 938 if (LIT_STRNEQL(orig_sg->segname, SEG_TEXT)) 939 orig_ts = orig_sg; 940 else if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT)) 941 orig_le = orig_sg; 942 else if (LIT_STRNEQL(orig_sg->segname, "")) 943 orig_ts = orig_sg; /* kexts have a single unnamed segment */ 944 } 945 else if (cmd->cmd == LC_SYMTAB) 946 orig_st = (struct symtab_command *) cmd; 947 948 cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize); 949 } 950 951 if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL)) 952 return; 953 954 sym = (kernel_nlist_t *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff); 955 strings = (char *)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff); 956 957 /* Find extent of the TEXT section */ 958 instrLow = (uintptr_t)orig_ts->vmaddr; 959 instrHigh = (uintptr_t)(orig_ts->vmaddr + orig_ts->vmsize); 960 961 for (i = 0; i < orig_st->nsyms; i++) { 962 uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT); 963 char *name = strings + sym[i].n_un.n_strx; 964 965 /* Check that the symbol is a global and that it has a name. */ 966 if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type)) 967 continue; 968 969 if (0 == sym[i].n_un.n_strx) /* iff a null, "", name. */ 970 continue; 971 972 /* Lop off omnipresent leading underscore. */ 973 if (*name == '_') 974 name += 1; 975 976 /* 977 * We're only blacklisting functions in the kernel for now. 978 */ 979 if (MOD_IS_MACH_KERNEL(ctl) && !is_symbol_valid(name)) 980 continue; 981 982 __provide_probe_64(ctl, instrLow, instrHigh, modname, name, (machine_inst_t*)sym[i].n_value); 983 } 984} 985 986static void 987__user_syms_provide_module(void *arg, struct modctl *ctl) 988{ 989#pragma unused(arg) 990 char *modname; 991 unsigned int i; 992 993 modname = ctl->mod_modname; 994 995 dtrace_module_symbols_t* module_symbols = ctl->mod_user_symbols; 996 if (module_symbols) { 997 for (i=0; i<module_symbols->dtmodsyms_count; i++) { 998 999 /* 1000 * symbol->dtsym_addr (the symbol address) passed in from 1001 * user space, is already slid for both kexts and kernel. 1002 */ 1003 dtrace_symbol_t* symbol = &module_symbols->dtmodsyms_symbols[i]; 1004 1005 char* name = symbol->dtsym_name; 1006 1007 /* Lop off omnipresent leading underscore. */ 1008 if (*name == '_') 1009 name += 1; 1010 1011 /* 1012 * We're only blacklisting functions in the kernel for now. 1013 */ 1014 if (MOD_IS_MACH_KERNEL(ctl) && !is_symbol_valid(name)) 1015 continue; 1016 1017 __provide_probe_64(ctl, (uintptr_t)symbol->dtsym_addr, (uintptr_t)(symbol->dtsym_addr + symbol->dtsym_size), modname, name, (machine_inst_t*)(uintptr_t)symbol->dtsym_addr); 1018 } 1019 } 1020} 1021 1022extern int dtrace_kernel_symbol_mode; 1023 1024/*ARGSUSED*/ 1025void 1026fbt_provide_module(void *arg, struct modctl *ctl) 1027{ 1028 ASSERT(ctl != NULL); 1029 ASSERT(dtrace_kernel_symbol_mode != DTRACE_KERNEL_SYMBOLS_NEVER); 1030 lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED); 1031 1032 if (MOD_FBT_DONE(ctl)) 1033 return; 1034 1035 if (!is_module_valid(ctl)) { 1036 ctl->mod_flags |= MODCTL_FBT_INVALID; 1037 return; 1038 } 1039 1040 if (MOD_HAS_KERNEL_SYMBOLS(ctl)) { 1041 __kernel_syms_provide_module(arg, ctl); 1042 ctl->mod_flags |= MODCTL_FBT_PROBES_PROVIDED; 1043 return; 1044 } 1045 1046 if (MOD_HAS_USERSPACE_SYMBOLS(ctl)) { 1047 __user_syms_provide_module(arg, ctl); 1048 ctl->mod_flags |= MODCTL_FBT_PROBES_PROVIDED; 1049 if (MOD_FBT_PROVIDE_PRIVATE_PROBES(ctl)) 1050 ctl->mod_flags |= MODCTL_FBT_PRIVATE_PROBES_PROVIDED; 1051 return; 1052 } 1053} 1054