1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27/* 28 * #pragma ident "@(#)fasttrap_isa.c 1.27 08/04/09 SMI" 29 */ 30 31#ifdef KERNEL 32#ifndef _KERNEL 33#define _KERNEL /* Solaris vs. Darwin */ 34#endif 35#endif 36 37#include <sys/fasttrap_isa.h> 38#include <sys/fasttrap_impl.h> 39#include <sys/dtrace.h> 40#include <sys/dtrace_impl.h> 41extern dtrace_id_t dtrace_probeid_error; 42 43#include "fasttrap_regset.h" 44 45#include <sys/dtrace_ptss.h> 46#include <kern/debug.h> 47 48#include <machine/pal_routines.h> 49 50/* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */ 51#define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */ 52 53/* 54 * Lossless User-Land Tracing on x86 55 * --------------------------------- 56 * 57 * The execution of most instructions is not dependent on the address; for 58 * these instructions it is sufficient to copy them into the user process's 59 * address space and execute them. To effectively single-step an instruction 60 * in user-land, we copy out the following sequence of instructions to scratch 61 * space in the user thread's ulwp_t structure. 62 * 63 * We then set the program counter (%eip or %rip) to point to this scratch 64 * space. Once execution resumes, the original instruction is executed and 65 * then control flow is redirected to what was originally the subsequent 66 * instruction. If the kernel attemps to deliver a signal while single- 67 * stepping, the signal is deferred and the program counter is moved into the 68 * second sequence of instructions. The second sequence ends in a trap into 69 * the kernel where the deferred signal is then properly handled and delivered. 70 * 71 * For instructions whose execute is position dependent, we perform simple 72 * emulation. These instructions are limited to control transfer 73 * instructions in 32-bit mode, but in 64-bit mode there's the added wrinkle 74 * of %rip-relative addressing that means that almost any instruction can be 75 * position dependent. For all the details on how we emulate generic 76 * instructions included %rip-relative instructions, see the code in 77 * fasttrap_pid_probe() below where we handle instructions of type 78 * FASTTRAP_T_COMMON (under the header: Generic Instruction Tracing). 79 */ 80 81#define FASTTRAP_MODRM_MOD(modrm) (((modrm) >> 6) & 0x3) 82#define FASTTRAP_MODRM_REG(modrm) (((modrm) >> 3) & 0x7) 83#define FASTTRAP_MODRM_RM(modrm) ((modrm) & 0x7) 84#define FASTTRAP_MODRM(mod, reg, rm) (((mod) << 6) | ((reg) << 3) | (rm)) 85 86#define FASTTRAP_SIB_SCALE(sib) (((sib) >> 6) & 0x3) 87#define FASTTRAP_SIB_INDEX(sib) (((sib) >> 3) & 0x7) 88#define FASTTRAP_SIB_BASE(sib) ((sib) & 0x7) 89 90#define FASTTRAP_REX_W(rex) (((rex) >> 3) & 1) 91#define FASTTRAP_REX_R(rex) (((rex) >> 2) & 1) 92#define FASTTRAP_REX_X(rex) (((rex) >> 1) & 1) 93#define FASTTRAP_REX_B(rex) ((rex) & 1) 94#define FASTTRAP_REX(w, r, x, b) \ 95 (0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b)) 96 97/* 98 * Single-byte op-codes. 99 */ 100#define FASTTRAP_PUSHL_EBP 0x55 101 102#define FASTTRAP_JO 0x70 103#define FASTTRAP_JNO 0x71 104#define FASTTRAP_JB 0x72 105#define FASTTRAP_JAE 0x73 106#define FASTTRAP_JE 0x74 107#define FASTTRAP_JNE 0x75 108#define FASTTRAP_JBE 0x76 109#define FASTTRAP_JA 0x77 110#define FASTTRAP_JS 0x78 111#define FASTTRAP_JNS 0x79 112#define FASTTRAP_JP 0x7a 113#define FASTTRAP_JNP 0x7b 114#define FASTTRAP_JL 0x7c 115#define FASTTRAP_JGE 0x7d 116#define FASTTRAP_JLE 0x7e 117#define FASTTRAP_JG 0x7f 118 119#define FASTTRAP_NOP 0x90 120 121#define FASTTRAP_MOV_EAX 0xb8 122#define FASTTRAP_MOV_ECX 0xb9 123 124#define FASTTRAP_RET16 0xc2 125#define FASTTRAP_RET 0xc3 126 127#define FASTTRAP_LOOPNZ 0xe0 128#define FASTTRAP_LOOPZ 0xe1 129#define FASTTRAP_LOOP 0xe2 130#define FASTTRAP_JCXZ 0xe3 131 132#define FASTTRAP_CALL 0xe8 133#define FASTTRAP_JMP32 0xe9 134#define FASTTRAP_JMP8 0xeb 135 136#define FASTTRAP_INT3 0xcc 137#define FASTTRAP_INT 0xcd 138#define T_DTRACE_RET 0x7f 139 140#define FASTTRAP_2_BYTE_OP 0x0f 141#define FASTTRAP_GROUP5_OP 0xff 142 143/* 144 * Two-byte op-codes (second byte only). 145 */ 146#define FASTTRAP_0F_JO 0x80 147#define FASTTRAP_0F_JNO 0x81 148#define FASTTRAP_0F_JB 0x82 149#define FASTTRAP_0F_JAE 0x83 150#define FASTTRAP_0F_JE 0x84 151#define FASTTRAP_0F_JNE 0x85 152#define FASTTRAP_0F_JBE 0x86 153#define FASTTRAP_0F_JA 0x87 154#define FASTTRAP_0F_JS 0x88 155#define FASTTRAP_0F_JNS 0x89 156#define FASTTRAP_0F_JP 0x8a 157#define FASTTRAP_0F_JNP 0x8b 158#define FASTTRAP_0F_JL 0x8c 159#define FASTTRAP_0F_JGE 0x8d 160#define FASTTRAP_0F_JLE 0x8e 161#define FASTTRAP_0F_JG 0x8f 162 163#define FASTTRAP_EFLAGS_OF 0x800 164#define FASTTRAP_EFLAGS_DF 0x400 165#define FASTTRAP_EFLAGS_SF 0x080 166#define FASTTRAP_EFLAGS_ZF 0x040 167#define FASTTRAP_EFLAGS_AF 0x010 168#define FASTTRAP_EFLAGS_PF 0x004 169#define FASTTRAP_EFLAGS_CF 0x001 170 171/* 172 * Instruction prefixes. 173 */ 174#define FASTTRAP_PREFIX_OPERAND 0x66 175#define FASTTRAP_PREFIX_ADDRESS 0x67 176#define FASTTRAP_PREFIX_CS 0x2E 177#define FASTTRAP_PREFIX_DS 0x3E 178#define FASTTRAP_PREFIX_ES 0x26 179#define FASTTRAP_PREFIX_FS 0x64 180#define FASTTRAP_PREFIX_GS 0x65 181#define FASTTRAP_PREFIX_SS 0x36 182#define FASTTRAP_PREFIX_LOCK 0xF0 183#define FASTTRAP_PREFIX_REP 0xF3 184#define FASTTRAP_PREFIX_REPNE 0xF2 185 186#define FASTTRAP_NOREG 0xff 187 188/* 189 * Map between instruction register encodings and the kernel constants which 190 * correspond to indicies into struct regs. 191 */ 192 193/* 194 * APPLE NOTE: We are cheating here. The regmap is used to decode which register 195 * a given instruction is trying to reference. OS X does not have extended registers 196 * for 32 bit apps, but the *order* is the same. So for 32 bit state, we will return: 197 * 198 * REG_RAX -> EAX 199 * REG_RCX -> ECX 200 * ... 201 * REG_RDI -> EDI 202 * 203 * The fasttrap_getreg function knows how to make the correct transformation. 204 */ 205#if __sol64 || defined(__APPLE__) 206static const uint8_t regmap[16] = { 207 REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI, 208 REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15, 209}; 210#else 211static const uint8_t regmap[8] = { 212 EAX, ECX, EDX, EBX, UESP, EBP, ESI, EDI 213}; 214#endif 215 216static user_addr_t fasttrap_getreg(x86_saved_state_t *, uint_t); 217 218static uint64_t 219fasttrap_anarg(x86_saved_state_t *regs, int function_entry, int argno) 220{ 221 uint64_t value; 222 int shift = function_entry ? 1 : 0; 223 224 x86_saved_state64_t *regs64; 225 x86_saved_state32_t *regs32; 226 unsigned int p_model; 227 228 if (is_saved_state64(regs)) { 229 regs64 = saved_state64(regs); 230 regs32 = NULL; 231 p_model = DATAMODEL_LP64; 232 } else { 233 regs64 = NULL; 234 regs32 = saved_state32(regs); 235 p_model = DATAMODEL_ILP32; 236 } 237 238 if (p_model == DATAMODEL_LP64) { 239 user_addr_t stack; 240 241 /* 242 * In 64-bit mode, the first six arguments are stored in 243 * registers. 244 */ 245 if (argno < 6) 246 return ((®s64->rdi)[argno]); 247 248 stack = regs64->isf.rsp + sizeof(uint64_t) * (argno - 6 + shift); 249 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 250 value = dtrace_fuword64(stack); 251 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); 252 } else { 253 uint32_t *stack = (uint32_t *)(uintptr_t)(regs32->uesp); 254 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 255 value = dtrace_fuword32((user_addr_t)(unsigned long)&stack[argno + shift]); 256 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); 257 } 258 259 return (value); 260} 261 262/*ARGSUSED*/ 263int 264fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, user_addr_t pc, 265 fasttrap_probe_type_t type) 266{ 267#pragma unused(type) 268 uint8_t instr[FASTTRAP_MAX_INSTR_SIZE + 10]; 269 size_t len = FASTTRAP_MAX_INSTR_SIZE; 270 size_t first = MIN(len, PAGE_SIZE - (pc & PAGE_MASK)); 271 uint_t start = 0; 272 size_t size; 273 int rmindex; 274 uint8_t seg, rex = 0; 275 unsigned int p_model = (p->p_flag & P_LP64) ? DATAMODEL_LP64 : DATAMODEL_ILP32; 276 277 /* 278 * Read the instruction at the given address out of the process's 279 * address space. We don't have to worry about a debugger 280 * changing this instruction before we overwrite it with our trap 281 * instruction since P_PR_LOCK is set. Since instructions can span 282 * pages, we potentially read the instruction in two parts. If the 283 * second part fails, we just zero out that part of the instruction. 284 */ 285 /* 286 * APPLE NOTE: Of course, we do not have a P_PR_LOCK, so this is racey... 287 */ 288 if (uread(p, &instr[0], first, pc) != 0) 289 return (-1); 290 if (len > first && 291 uread(p, &instr[first], len - first, pc + first) != 0) { 292 bzero(&instr[first], len - first); 293 len = first; 294 } 295 296 /* 297 * If the disassembly fails, then we have a malformed instruction. 298 */ 299 if ((size = dtrace_instr_size_isa(instr, p_model, &rmindex)) <= 0) 300 return (-1); 301 302 /* 303 * Make sure the disassembler isn't completely broken. 304 */ 305 ASSERT(-1 <= rmindex && rmindex < (int)size); 306 307 /* 308 * If the computed size is greater than the number of bytes read, 309 * then it was a malformed instruction possibly because it fell on a 310 * page boundary and the subsequent page was missing or because of 311 * some malicious user. 312 */ 313 if (size > len) 314 return (-1); 315 316 tp->ftt_size = (uint8_t)size; 317 tp->ftt_segment = FASTTRAP_SEG_NONE; 318 319 /* 320 * Find the start of the instruction's opcode by processing any 321 * legacy prefixes. 322 */ 323 for (;;) { 324 seg = 0; 325 switch (instr[start]) { 326 case FASTTRAP_PREFIX_SS: 327 seg++; 328 /*FALLTHRU*/ 329 case FASTTRAP_PREFIX_GS: 330 seg++; 331 /*FALLTHRU*/ 332 case FASTTRAP_PREFIX_FS: 333 seg++; 334 /*FALLTHRU*/ 335 case FASTTRAP_PREFIX_ES: 336 seg++; 337 /*FALLTHRU*/ 338 case FASTTRAP_PREFIX_DS: 339 seg++; 340 /*FALLTHRU*/ 341 case FASTTRAP_PREFIX_CS: 342 seg++; 343 /*FALLTHRU*/ 344 case FASTTRAP_PREFIX_OPERAND: 345 case FASTTRAP_PREFIX_ADDRESS: 346 case FASTTRAP_PREFIX_LOCK: 347 case FASTTRAP_PREFIX_REP: 348 case FASTTRAP_PREFIX_REPNE: 349 if (seg != 0) { 350 /* 351 * It's illegal for an instruction to specify 352 * two segment prefixes -- give up on this 353 * illegal instruction. 354 */ 355 if (tp->ftt_segment != FASTTRAP_SEG_NONE) 356 return (-1); 357 358 tp->ftt_segment = seg; 359 } 360 start++; 361 continue; 362 } 363 break; 364 } 365 366#if __sol64 || defined(__APPLE__) 367 /* 368 * Identify the REX prefix on 64-bit processes. 369 */ 370 if (p_model == DATAMODEL_LP64 && (instr[start] & 0xf0) == 0x40) 371 rex = instr[start++]; 372#endif 373 374 /* 375 * Now that we're pretty sure that the instruction is okay, copy the 376 * valid part to the tracepoint. 377 */ 378 bcopy(instr, tp->ftt_instr, FASTTRAP_MAX_INSTR_SIZE); 379 380 tp->ftt_type = FASTTRAP_T_COMMON; 381 if (instr[start] == FASTTRAP_2_BYTE_OP) { 382 switch (instr[start + 1]) { 383 case FASTTRAP_0F_JO: 384 case FASTTRAP_0F_JNO: 385 case FASTTRAP_0F_JB: 386 case FASTTRAP_0F_JAE: 387 case FASTTRAP_0F_JE: 388 case FASTTRAP_0F_JNE: 389 case FASTTRAP_0F_JBE: 390 case FASTTRAP_0F_JA: 391 case FASTTRAP_0F_JS: 392 case FASTTRAP_0F_JNS: 393 case FASTTRAP_0F_JP: 394 case FASTTRAP_0F_JNP: 395 case FASTTRAP_0F_JL: 396 case FASTTRAP_0F_JGE: 397 case FASTTRAP_0F_JLE: 398 case FASTTRAP_0F_JG: 399 tp->ftt_type = FASTTRAP_T_JCC; 400 tp->ftt_code = (instr[start + 1] & 0x0f) | FASTTRAP_JO; 401 tp->ftt_dest = pc + tp->ftt_size + 402 /* LINTED - alignment */ 403 *(int32_t *)&instr[start + 2]; 404 break; 405 } 406 } else if (instr[start] == FASTTRAP_GROUP5_OP) { 407 uint_t mod = FASTTRAP_MODRM_MOD(instr[start + 1]); 408 uint_t reg = FASTTRAP_MODRM_REG(instr[start + 1]); 409 uint_t rm = FASTTRAP_MODRM_RM(instr[start + 1]); 410 411 if (reg == 2 || reg == 4) { 412 uint_t i, sz; 413 414 if (reg == 2) 415 tp->ftt_type = FASTTRAP_T_CALL; 416 else 417 tp->ftt_type = FASTTRAP_T_JMP; 418 419 if (mod == 3) 420 tp->ftt_code = 2; 421 else 422 tp->ftt_code = 1; 423 424 ASSERT(p_model == DATAMODEL_LP64 || rex == 0); 425 426 /* 427 * See AMD x86-64 Architecture Programmer's Manual 428 * Volume 3, Section 1.2.7, Table 1-12, and 429 * Appendix A.3.1, Table A-15. 430 */ 431 if (mod != 3 && rm == 4) { 432 uint8_t sib = instr[start + 2]; 433 uint_t index = FASTTRAP_SIB_INDEX(sib); 434 uint_t base = FASTTRAP_SIB_BASE(sib); 435 436 tp->ftt_scale = FASTTRAP_SIB_SCALE(sib); 437 438 tp->ftt_index = (index == 4) ? 439 FASTTRAP_NOREG : 440 regmap[index | (FASTTRAP_REX_X(rex) << 3)]; 441 tp->ftt_base = (mod == 0 && base == 5) ? 442 FASTTRAP_NOREG : 443 regmap[base | (FASTTRAP_REX_B(rex) << 3)]; 444 445 i = 3; 446 sz = mod == 1 ? 1 : 4; 447 } else { 448 /* 449 * In 64-bit mode, mod == 0 and r/m == 5 450 * denotes %rip-relative addressing; in 32-bit 451 * mode, the base register isn't used. In both 452 * modes, there is a 32-bit operand. 453 */ 454 if (mod == 0 && rm == 5) { 455#if __sol64 || defined(__APPLE__) 456 if (p_model == DATAMODEL_LP64) 457 tp->ftt_base = REG_RIP; 458 else 459#endif 460 tp->ftt_base = FASTTRAP_NOREG; 461 sz = 4; 462 } else { 463 uint8_t base = rm | 464 (FASTTRAP_REX_B(rex) << 3); 465 466 tp->ftt_base = regmap[base]; 467 sz = mod == 1 ? 1 : mod == 2 ? 4 : 0; 468 } 469 tp->ftt_index = FASTTRAP_NOREG; 470 i = 2; 471 } 472 473 if (sz == 1) { 474 tp->ftt_dest = *(int8_t *)&instr[start + i]; 475 } else if (sz == 4) { 476 /* LINTED - alignment */ 477 tp->ftt_dest = *(int32_t *)&instr[start + i]; 478 } else { 479 tp->ftt_dest = 0; 480 } 481 } 482 } else { 483 switch (instr[start]) { 484 case FASTTRAP_RET: 485 tp->ftt_type = FASTTRAP_T_RET; 486 break; 487 488 case FASTTRAP_RET16: 489 tp->ftt_type = FASTTRAP_T_RET16; 490 /* LINTED - alignment */ 491 tp->ftt_dest = *(uint16_t *)&instr[start + 1]; 492 break; 493 494 case FASTTRAP_JO: 495 case FASTTRAP_JNO: 496 case FASTTRAP_JB: 497 case FASTTRAP_JAE: 498 case FASTTRAP_JE: 499 case FASTTRAP_JNE: 500 case FASTTRAP_JBE: 501 case FASTTRAP_JA: 502 case FASTTRAP_JS: 503 case FASTTRAP_JNS: 504 case FASTTRAP_JP: 505 case FASTTRAP_JNP: 506 case FASTTRAP_JL: 507 case FASTTRAP_JGE: 508 case FASTTRAP_JLE: 509 case FASTTRAP_JG: 510 tp->ftt_type = FASTTRAP_T_JCC; 511 tp->ftt_code = instr[start]; 512 tp->ftt_dest = pc + tp->ftt_size + 513 (int8_t)instr[start + 1]; 514 break; 515 516 case FASTTRAP_LOOPNZ: 517 case FASTTRAP_LOOPZ: 518 case FASTTRAP_LOOP: 519 tp->ftt_type = FASTTRAP_T_LOOP; 520 tp->ftt_code = instr[start]; 521 tp->ftt_dest = pc + tp->ftt_size + 522 (int8_t)instr[start + 1]; 523 break; 524 525 case FASTTRAP_JCXZ: 526 tp->ftt_type = FASTTRAP_T_JCXZ; 527 tp->ftt_dest = pc + tp->ftt_size + 528 (int8_t)instr[start + 1]; 529 break; 530 531 case FASTTRAP_CALL: 532 tp->ftt_type = FASTTRAP_T_CALL; 533 tp->ftt_dest = pc + tp->ftt_size + 534 /* LINTED - alignment */ 535 *(int32_t *)&instr[start + 1]; 536 tp->ftt_code = 0; 537 break; 538 539 case FASTTRAP_JMP32: 540 tp->ftt_type = FASTTRAP_T_JMP; 541 tp->ftt_dest = pc + tp->ftt_size + 542 /* LINTED - alignment */ 543 *(int32_t *)&instr[start + 1]; 544 break; 545 case FASTTRAP_JMP8: 546 tp->ftt_type = FASTTRAP_T_JMP; 547 tp->ftt_dest = pc + tp->ftt_size + 548 (int8_t)instr[start + 1]; 549 break; 550 551 case FASTTRAP_PUSHL_EBP: 552 if (start == 0) 553 tp->ftt_type = FASTTRAP_T_PUSHL_EBP; 554 break; 555 556 case FASTTRAP_NOP: 557#if __sol64 || defined(__APPLE__) 558 ASSERT(p_model == DATAMODEL_LP64 || rex == 0); 559 560 /* 561 * On sol64 we have to be careful not to confuse a nop 562 * (actually xchgl %eax, %eax) with an instruction using 563 * the same opcode, but that does something different 564 * (e.g. xchgl %r8d, %eax or xcghq %r8, %rax). 565 */ 566 if (FASTTRAP_REX_B(rex) == 0) 567#endif 568 tp->ftt_type = FASTTRAP_T_NOP; 569 break; 570 571 case FASTTRAP_INT3: 572 /* 573 * The pid provider shares the int3 trap with debugger 574 * breakpoints so we can't instrument them. 575 */ 576 ASSERT(instr[start] == FASTTRAP_INSTR); 577 return (-1); 578 579 case FASTTRAP_INT: 580 /* 581 * Interrupts seem like they could be traced with 582 * no negative implications, but it's possible that 583 * a thread could be redirected by the trap handling 584 * code which would eventually return to the 585 * instruction after the interrupt. If the interrupt 586 * were in our scratch space, the subsequent 587 * instruction might be overwritten before we return. 588 * Accordingly we refuse to instrument any interrupt. 589 */ 590 return (-1); 591 } 592 } 593 594#if __sol64 || defined(__APPLE__) 595 if (p_model == DATAMODEL_LP64 && tp->ftt_type == FASTTRAP_T_COMMON) { 596 /* 597 * If the process is 64-bit and the instruction type is still 598 * FASTTRAP_T_COMMON -- meaning we're going to copy it out an 599 * execute it -- we need to watch for %rip-relative 600 * addressing mode. See the portion of fasttrap_pid_probe() 601 * below where we handle tracepoints with type 602 * FASTTRAP_T_COMMON for how we emulate instructions that 603 * employ %rip-relative addressing. 604 */ 605 if (rmindex != -1) { 606 uint_t mod = FASTTRAP_MODRM_MOD(instr[rmindex]); 607 uint_t reg = FASTTRAP_MODRM_REG(instr[rmindex]); 608 uint_t rm = FASTTRAP_MODRM_RM(instr[rmindex]); 609 610 ASSERT(rmindex > (int)start); 611 612 if (mod == 0 && rm == 5) { 613 /* 614 * We need to be sure to avoid other 615 * registers used by this instruction. While 616 * the reg field may determine the op code 617 * rather than denoting a register, assuming 618 * that it denotes a register is always safe. 619 * We leave the REX field intact and use 620 * whatever value's there for simplicity. 621 */ 622 if (reg != 0) { 623 tp->ftt_ripmode = FASTTRAP_RIP_1 | 624 (FASTTRAP_RIP_X * 625 FASTTRAP_REX_B(rex)); 626 rm = 0; 627 } else { 628 tp->ftt_ripmode = FASTTRAP_RIP_2 | 629 (FASTTRAP_RIP_X * 630 FASTTRAP_REX_B(rex)); 631 rm = 1; 632 } 633 634 tp->ftt_modrm = tp->ftt_instr[rmindex]; 635 tp->ftt_instr[rmindex] = 636 FASTTRAP_MODRM(2, reg, rm); 637 } 638 } 639 } 640#endif 641 642 return (0); 643} 644 645int 646fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp) 647{ 648 fasttrap_instr_t instr = FASTTRAP_INSTR; 649 650 if (uwrite(p, &instr, 1, tp->ftt_pc) != 0) 651 return (-1); 652 653 return (0); 654} 655 656int 657fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp) 658{ 659 uint8_t instr; 660 661 /* 662 * Distinguish between read or write failures and a changed 663 * instruction. 664 */ 665 if (uread(p, &instr, 1, tp->ftt_pc) != 0) 666 return (0); 667 if (instr != FASTTRAP_INSTR) 668 return (0); 669 if (uwrite(p, &tp->ftt_instr[0], 1, tp->ftt_pc) != 0) 670 return (-1); 671 672 return (0); 673} 674 675static void 676fasttrap_return_common(x86_saved_state_t *regs, user_addr_t pc, pid_t pid, 677 user_addr_t new_pc) 678{ 679 x86_saved_state64_t *regs64; 680 x86_saved_state32_t *regs32; 681 unsigned int p_model; 682 683 dtrace_icookie_t cookie; 684 685 if (is_saved_state64(regs)) { 686 regs64 = saved_state64(regs); 687 regs32 = NULL; 688 p_model = DATAMODEL_LP64; 689 } else { 690 regs64 = NULL; 691 regs32 = saved_state32(regs); 692 p_model = DATAMODEL_ILP32; 693 } 694 695 fasttrap_tracepoint_t *tp; 696 fasttrap_bucket_t *bucket; 697 fasttrap_id_t *id; 698 lck_mtx_t *pid_mtx; 699 700 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; 701 lck_mtx_lock(pid_mtx); 702 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; 703 704 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { 705 if (pid == tp->ftt_pid && pc == tp->ftt_pc && 706 tp->ftt_proc->ftpc_acount != 0) 707 break; 708 } 709 710 /* 711 * Don't sweat it if we can't find the tracepoint again; unlike 712 * when we're in fasttrap_pid_probe(), finding the tracepoint here 713 * is not essential to the correct execution of the process. 714 */ 715 if (tp == NULL) { 716 lck_mtx_unlock(pid_mtx); 717 return; 718 } 719 720 for (id = tp->ftt_retids; id != NULL; id = id->fti_next) { 721 /* 722 * If there's a branch that could act as a return site, we 723 * need to trace it, and check here if the program counter is 724 * external to the function. 725 */ 726 if (tp->ftt_type != FASTTRAP_T_RET && 727 tp->ftt_type != FASTTRAP_T_RET16 && 728 new_pc - id->fti_probe->ftp_faddr < 729 id->fti_probe->ftp_fsize) 730 continue; 731 732 /* 733 * Provide a hint to the stack trace functions to add the 734 * following pc to the top of the stack since it's missing 735 * on a return probe yet highly desirable for consistency. 736 */ 737 cookie = dtrace_interrupt_disable(); 738 cpu_core[CPU->cpu_id].cpuc_missing_tos = pc; 739 if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) { 740 dtrace_probe(dtrace_probeid_error, 0 /* state */, id->fti_probe->ftp_id, 741 1 /* ndx */, -1 /* offset */, DTRACEFLT_UPRIV); 742 } else if (p_model == DATAMODEL_LP64) { 743 dtrace_probe(id->fti_probe->ftp_id, 744 pc - id->fti_probe->ftp_faddr, 745 regs64->rax, regs64->rdx, 0, 0); 746 } else { 747 dtrace_probe(id->fti_probe->ftp_id, 748 pc - id->fti_probe->ftp_faddr, 749 regs32->eax, regs32->edx, 0, 0); 750 } 751 /* remove the hint */ 752 cpu_core[CPU->cpu_id].cpuc_missing_tos = 0; 753 dtrace_interrupt_enable(cookie); 754 } 755 756 lck_mtx_unlock(pid_mtx); 757} 758 759static void 760fasttrap_sigsegv(proc_t *p, uthread_t t, user_addr_t addr) 761{ 762 proc_lock(p); 763 764 /* Set fault address and mark signal */ 765 t->uu_code = addr; 766 t->uu_siglist |= sigmask(SIGSEGV); 767 768 /* 769 * XXX These two line may be redundant; if not, then we need 770 * XXX to potentially set the data address in the machine 771 * XXX specific thread state structure to indicate the address. 772 */ 773 t->uu_exception = KERN_INVALID_ADDRESS; /* SIGSEGV */ 774 t->uu_subcode = 0; /* XXX pad */ 775 776 proc_unlock(p); 777 778 /* raise signal */ 779 signal_setast(t->uu_context.vc_thread); 780} 781 782static void 783fasttrap_usdt_args64(fasttrap_probe_t *probe, x86_saved_state64_t *regs64, int argc, 784 uint64_t *argv) 785{ 786 int i, x, cap = MIN(argc, probe->ftp_nargs); 787 user_addr_t stack = (user_addr_t)regs64->isf.rsp; 788 789 for (i = 0; i < cap; i++) { 790 x = probe->ftp_argmap[i]; 791 792 if (x < 6) { 793 /* FIXME! This may be broken, needs testing */ 794 argv[i] = (®s64->rdi)[x]; 795 } else { 796 fasttrap_fuword64_noerr(stack + (x * sizeof(uint64_t)), &argv[i]); 797 } 798 } 799 800 for (; i < argc; i++) { 801 argv[i] = 0; 802 } 803} 804 805static void 806fasttrap_usdt_args32(fasttrap_probe_t *probe, x86_saved_state32_t *regs32, int argc, 807 uint32_t *argv) 808{ 809 int i, x, cap = MIN(argc, probe->ftp_nargs); 810 uint32_t *stack = (uint32_t *)(uintptr_t)(regs32->uesp); 811 812 for (i = 0; i < cap; i++) { 813 x = probe->ftp_argmap[i]; 814 815 fasttrap_fuword32_noerr((user_addr_t)(unsigned long)&stack[x], &argv[i]); 816 } 817 818 for (; i < argc; i++) { 819 argv[i] = 0; 820 } 821} 822 823/* 824 * FIXME! 825 */ 826static int 827fasttrap_do_seg(fasttrap_tracepoint_t *tp, x86_saved_state_t *rp, user_addr_t *addr) // 64 bit 828{ 829#pragma unused(tp, rp, addr) 830 printf("fasttrap_do_seg() called while unimplemented.\n"); 831#if 0 832 proc_t *p = curproc; 833 user_desc_t *desc; 834 uint16_t sel, ndx, type; 835 uintptr_t limit; 836 837 switch (tp->ftt_segment) { 838 case FASTTRAP_SEG_CS: 839 sel = rp->r_cs; 840 break; 841 case FASTTRAP_SEG_DS: 842 sel = rp->r_ds; 843 break; 844 case FASTTRAP_SEG_ES: 845 sel = rp->r_es; 846 break; 847 case FASTTRAP_SEG_FS: 848 sel = rp->r_fs; 849 break; 850 case FASTTRAP_SEG_GS: 851 sel = rp->r_gs; 852 break; 853 case FASTTRAP_SEG_SS: 854 sel = rp->r_ss; 855 break; 856 } 857 858 /* 859 * Make sure the given segment register specifies a user priority 860 * selector rather than a kernel selector. 861 */ 862 if (!SELISUPL(sel)) 863 return (-1); 864 865 ndx = SELTOIDX(sel); 866 867 /* 868 * Check the bounds and grab the descriptor out of the specified 869 * descriptor table. 870 */ 871 if (SELISLDT(sel)) { 872 if (ndx > p->p_ldtlimit) 873 return (-1); 874 875 desc = p->p_ldt + ndx; 876 877 } else { 878 if (ndx >= NGDT) 879 return (-1); 880 881 desc = cpu_get_gdt() + ndx; 882 } 883 884 /* 885 * The descriptor must have user privilege level and it must be 886 * present in memory. 887 */ 888 if (desc->usd_dpl != SEL_UPL || desc->usd_p != 1) 889 return (-1); 890 891 type = desc->usd_type; 892 893 /* 894 * If the S bit in the type field is not set, this descriptor can 895 * only be used in system context. 896 */ 897 if ((type & 0x10) != 0x10) 898 return (-1); 899 900 limit = USEGD_GETLIMIT(desc) * (desc->usd_gran ? PAGESIZE : 1); 901 902 if (tp->ftt_segment == FASTTRAP_SEG_CS) { 903 /* 904 * The code/data bit and readable bit must both be set. 905 */ 906 if ((type & 0xa) != 0xa) 907 return (-1); 908 909 if (*addr > limit) 910 return (-1); 911 } else { 912 /* 913 * The code/data bit must be clear. 914 */ 915 if ((type & 0x8) != 0) 916 return (-1); 917 918 /* 919 * If the expand-down bit is clear, we just check the limit as 920 * it would naturally be applied. Otherwise, we need to check 921 * that the address is the range [limit + 1 .. 0xffff] or 922 * [limit + 1 ... 0xffffffff] depending on if the default 923 * operand size bit is set. 924 */ 925 if ((type & 0x4) == 0) { 926 if (*addr > limit) 927 return (-1); 928 } else if (desc->usd_def32) { 929 if (*addr < limit + 1 || 0xffff < *addr) 930 return (-1); 931 } else { 932 if (*addr < limit + 1 || 0xffffffff < *addr) 933 return (-1); 934 } 935 } 936 937 *addr += USEGD_GETBASE(desc); 938#endif /* 0 */ 939 return (0); 940} 941 942/* 943 * Due to variances between Solaris and xnu, I have split this into a 32 bit and 64 bit 944 * code path. It still takes an x86_saved_state_t* argument, because it must sometimes 945 * call other methods that require a x86_saved_state_t. 946 * 947 * NOTE!!!! 948 * 949 * Any changes made to this method must be echo'd in fasttrap_pid_probe64! 950 * 951 */ 952static int 953fasttrap_pid_probe32(x86_saved_state_t *regs) 954{ 955 ASSERT(is_saved_state32(regs)); 956 957 x86_saved_state32_t *regs32 = saved_state32(regs); 958 user_addr_t pc = regs32->eip - 1; 959 proc_t *p = current_proc(); 960 user_addr_t new_pc = 0; 961 fasttrap_bucket_t *bucket; 962 lck_mtx_t *pid_mtx; 963 fasttrap_tracepoint_t *tp, tp_local; 964 pid_t pid; 965 dtrace_icookie_t cookie; 966 uint_t is_enabled = 0; 967 968 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); 969 970 /* 971 * It's possible that a user (in a veritable orgy of bad planning) 972 * could redirect this thread's flow of control before it reached the 973 * return probe fasttrap. In this case we need to kill the process 974 * since it's in a unrecoverable state. 975 */ 976 if (uthread->t_dtrace_step) { 977 ASSERT(uthread->t_dtrace_on); 978 fasttrap_sigtrap(p, uthread, pc); 979 return (0); 980 } 981 982 /* 983 * Clear all user tracing flags. 984 */ 985 uthread->t_dtrace_ft = 0; 986 uthread->t_dtrace_pc = 0; 987 uthread->t_dtrace_npc = 0; 988 uthread->t_dtrace_scrpc = 0; 989 uthread->t_dtrace_astpc = 0; 990 991 /* 992 * Treat a child created by a call to vfork(2) as if it were its 993 * parent. We know that there's only one thread of control in such a 994 * process: this one. 995 */ 996 /* 997 * APPLE NOTE: Terry says: "You need to hold the process locks (currently: kernel funnel) for this traversal" 998 * FIXME: How do we assert this? 999 */ 1000 while (p->p_lflag & P_LINVFORK) 1001 p = p->p_pptr; 1002 1003 pid = p->p_pid; 1004 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; 1005 lck_mtx_lock(pid_mtx); 1006 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; 1007 1008 /* 1009 * Lookup the tracepoint that the process just hit. 1010 */ 1011 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { 1012 if (pid == tp->ftt_pid && pc == tp->ftt_pc && 1013 tp->ftt_proc->ftpc_acount != 0) 1014 break; 1015 } 1016 1017 /* 1018 * If we couldn't find a matching tracepoint, either a tracepoint has 1019 * been inserted without using the pid<pid> ioctl interface (see 1020 * fasttrap_ioctl), or somehow we have mislaid this tracepoint. 1021 */ 1022 if (tp == NULL) { 1023 lck_mtx_unlock(pid_mtx); 1024 return (-1); 1025 } 1026 1027 /* 1028 * Set the program counter to the address of the traced instruction 1029 * so that it looks right in ustack() output. 1030 */ 1031 regs32->eip = pc; 1032 1033 if (tp->ftt_ids != NULL) { 1034 fasttrap_id_t *id; 1035 1036 uint32_t s0, s1, s2, s3, s4, s5; 1037 uint32_t *stack = (uint32_t *)(uintptr_t)(regs32->uesp); 1038 1039 /* 1040 * In 32-bit mode, all arguments are passed on the 1041 * stack. If this is a function entry probe, we need 1042 * to skip the first entry on the stack as it 1043 * represents the return address rather than a 1044 * parameter to the function. 1045 */ 1046 fasttrap_fuword32_noerr((user_addr_t)(unsigned long)&stack[0], &s0); 1047 fasttrap_fuword32_noerr((user_addr_t)(unsigned long)&stack[1], &s1); 1048 fasttrap_fuword32_noerr((user_addr_t)(unsigned long)&stack[2], &s2); 1049 fasttrap_fuword32_noerr((user_addr_t)(unsigned long)&stack[3], &s3); 1050 fasttrap_fuword32_noerr((user_addr_t)(unsigned long)&stack[4], &s4); 1051 fasttrap_fuword32_noerr((user_addr_t)(unsigned long)&stack[5], &s5); 1052 1053 for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { 1054 fasttrap_probe_t *probe = id->fti_probe; 1055 1056 if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) { 1057 dtrace_probe(dtrace_probeid_error, 0 /* state */, probe->ftp_id, 1058 1 /* ndx */, -1 /* offset */, DTRACEFLT_UPRIV); 1059 } else if (id->fti_ptype == DTFTP_ENTRY) { 1060 /* 1061 * We note that this was an entry 1062 * probe to help ustack() find the 1063 * first caller. 1064 */ 1065 cookie = dtrace_interrupt_disable(); 1066 DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); 1067 dtrace_probe(probe->ftp_id, s1, s2, 1068 s3, s4, s5); 1069 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); 1070 dtrace_interrupt_enable(cookie); 1071 } else if (id->fti_ptype == DTFTP_IS_ENABLED) { 1072 /* 1073 * Note that in this case, we don't 1074 * call dtrace_probe() since it's only 1075 * an artificial probe meant to change 1076 * the flow of control so that it 1077 * encounters the true probe. 1078 */ 1079 is_enabled = 1; 1080 } else if (probe->ftp_argmap == NULL) { 1081 dtrace_probe(probe->ftp_id, s0, s1, 1082 s2, s3, s4); 1083 } else { 1084 uint32_t t[5]; 1085 1086 fasttrap_usdt_args32(probe, regs32, 1087 sizeof (t) / sizeof (t[0]), t); 1088 1089 dtrace_probe(probe->ftp_id, t[0], t[1], 1090 t[2], t[3], t[4]); 1091 } 1092 1093 /* APPLE NOTE: Oneshot probes get one and only one chance... */ 1094 if (probe->ftp_prov->ftp_provider_type == DTFTP_PROVIDER_ONESHOT) { 1095 fasttrap_tracepoint_remove(p, tp); 1096 } 1097 } 1098 } 1099 1100 /* 1101 * We're about to do a bunch of work so we cache a local copy of 1102 * the tracepoint to emulate the instruction, and then find the 1103 * tracepoint again later if we need to light up any return probes. 1104 */ 1105 tp_local = *tp; 1106 lck_mtx_unlock(pid_mtx); 1107 tp = &tp_local; 1108 1109 /* 1110 * Set the program counter to appear as though the traced instruction 1111 * had completely executed. This ensures that fasttrap_getreg() will 1112 * report the expected value for REG_RIP. 1113 */ 1114 regs32->eip = pc + tp->ftt_size; 1115 1116 /* 1117 * If there's an is-enabled probe connected to this tracepoint it 1118 * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax' 1119 * instruction that was placed there by DTrace when the binary was 1120 * linked. As this probe is, in fact, enabled, we need to stuff 1 1121 * into %eax or %rax. Accordingly, we can bypass all the instruction 1122 * emulation logic since we know the inevitable result. It's possible 1123 * that a user could construct a scenario where the 'is-enabled' 1124 * probe was on some other instruction, but that would be a rather 1125 * exotic way to shoot oneself in the foot. 1126 */ 1127 if (is_enabled) { 1128 regs32->eax = 1; 1129 new_pc = regs32->eip; 1130 goto done; 1131 } 1132 1133 /* 1134 * We emulate certain types of instructions to ensure correctness 1135 * (in the case of position dependent instructions) or optimize 1136 * common cases. The rest we have the thread execute back in user- 1137 * land. 1138 */ 1139 switch (tp->ftt_type) { 1140 case FASTTRAP_T_RET: 1141 case FASTTRAP_T_RET16: 1142 { 1143 user_addr_t dst; 1144 user_addr_t addr; 1145 int ret; 1146 1147 /* 1148 * We have to emulate _every_ facet of the behavior of a ret 1149 * instruction including what happens if the load from %esp 1150 * fails; in that case, we send a SIGSEGV. 1151 */ 1152 uint32_t dst32; 1153 ret = fasttrap_fuword32((user_addr_t)regs32->uesp, &dst32); 1154 dst = dst32; 1155 addr = regs32->uesp + sizeof (uint32_t); 1156 1157 if (ret == -1) { 1158 fasttrap_sigsegv(p, uthread, (user_addr_t)regs32->uesp); 1159 new_pc = pc; 1160 break; 1161 } 1162 1163 if (tp->ftt_type == FASTTRAP_T_RET16) 1164 addr += tp->ftt_dest; 1165 1166 regs32->uesp = addr; 1167 new_pc = dst; 1168 break; 1169 } 1170 1171 case FASTTRAP_T_JCC: 1172 { 1173 uint_t taken; 1174 1175 switch (tp->ftt_code) { 1176 case FASTTRAP_JO: 1177 taken = (regs32->efl & FASTTRAP_EFLAGS_OF) != 0; 1178 break; 1179 case FASTTRAP_JNO: 1180 taken = (regs32->efl & FASTTRAP_EFLAGS_OF) == 0; 1181 break; 1182 case FASTTRAP_JB: 1183 taken = (regs32->efl & FASTTRAP_EFLAGS_CF) != 0; 1184 break; 1185 case FASTTRAP_JAE: 1186 taken = (regs32->efl & FASTTRAP_EFLAGS_CF) == 0; 1187 break; 1188 case FASTTRAP_JE: 1189 taken = (regs32->efl & FASTTRAP_EFLAGS_ZF) != 0; 1190 break; 1191 case FASTTRAP_JNE: 1192 taken = (regs32->efl & FASTTRAP_EFLAGS_ZF) == 0; 1193 break; 1194 case FASTTRAP_JBE: 1195 taken = (regs32->efl & FASTTRAP_EFLAGS_CF) != 0 || 1196 (regs32->efl & FASTTRAP_EFLAGS_ZF) != 0; 1197 break; 1198 case FASTTRAP_JA: 1199 taken = (regs32->efl & FASTTRAP_EFLAGS_CF) == 0 && 1200 (regs32->efl & FASTTRAP_EFLAGS_ZF) == 0; 1201 break; 1202 case FASTTRAP_JS: 1203 taken = (regs32->efl & FASTTRAP_EFLAGS_SF) != 0; 1204 break; 1205 case FASTTRAP_JNS: 1206 taken = (regs32->efl & FASTTRAP_EFLAGS_SF) == 0; 1207 break; 1208 case FASTTRAP_JP: 1209 taken = (regs32->efl & FASTTRAP_EFLAGS_PF) != 0; 1210 break; 1211 case FASTTRAP_JNP: 1212 taken = (regs32->efl & FASTTRAP_EFLAGS_PF) == 0; 1213 break; 1214 case FASTTRAP_JL: 1215 taken = ((regs32->efl & FASTTRAP_EFLAGS_SF) == 0) != 1216 ((regs32->efl & FASTTRAP_EFLAGS_OF) == 0); 1217 break; 1218 case FASTTRAP_JGE: 1219 taken = ((regs32->efl & FASTTRAP_EFLAGS_SF) == 0) == 1220 ((regs32->efl & FASTTRAP_EFLAGS_OF) == 0); 1221 break; 1222 case FASTTRAP_JLE: 1223 taken = (regs32->efl & FASTTRAP_EFLAGS_ZF) != 0 || 1224 ((regs32->efl & FASTTRAP_EFLAGS_SF) == 0) != 1225 ((regs32->efl & FASTTRAP_EFLAGS_OF) == 0); 1226 break; 1227 case FASTTRAP_JG: 1228 taken = (regs32->efl & FASTTRAP_EFLAGS_ZF) == 0 && 1229 ((regs32->efl & FASTTRAP_EFLAGS_SF) == 0) == 1230 ((regs32->efl & FASTTRAP_EFLAGS_OF) == 0); 1231 break; 1232 default: 1233 taken = FALSE; 1234 } 1235 1236 if (taken) 1237 new_pc = tp->ftt_dest; 1238 else 1239 new_pc = pc + tp->ftt_size; 1240 break; 1241 } 1242 1243 case FASTTRAP_T_LOOP: 1244 { 1245 uint_t taken; 1246 greg_t cx = regs32->ecx--; 1247 1248 switch (tp->ftt_code) { 1249 case FASTTRAP_LOOPNZ: 1250 taken = (regs32->efl & FASTTRAP_EFLAGS_ZF) == 0 && 1251 cx != 0; 1252 break; 1253 case FASTTRAP_LOOPZ: 1254 taken = (regs32->efl & FASTTRAP_EFLAGS_ZF) != 0 && 1255 cx != 0; 1256 break; 1257 case FASTTRAP_LOOP: 1258 taken = (cx != 0); 1259 break; 1260 default: 1261 taken = FALSE; 1262 } 1263 1264 if (taken) 1265 new_pc = tp->ftt_dest; 1266 else 1267 new_pc = pc + tp->ftt_size; 1268 break; 1269 } 1270 1271 case FASTTRAP_T_JCXZ: 1272 { 1273 greg_t cx = regs32->ecx; 1274 1275 if (cx == 0) 1276 new_pc = tp->ftt_dest; 1277 else 1278 new_pc = pc + tp->ftt_size; 1279 break; 1280 } 1281 1282 case FASTTRAP_T_PUSHL_EBP: 1283 { 1284 user_addr_t addr = regs32->uesp - sizeof (uint32_t); 1285 int ret = fasttrap_suword32(addr, (uint32_t)regs32->ebp); 1286 1287 if (ret == -1) { 1288 fasttrap_sigsegv(p, uthread, addr); 1289 new_pc = pc; 1290 break; 1291 } 1292 1293 regs32->uesp = addr; 1294 new_pc = pc + tp->ftt_size; 1295 break; 1296 } 1297 1298 case FASTTRAP_T_NOP: 1299 new_pc = pc + tp->ftt_size; 1300 break; 1301 1302 case FASTTRAP_T_JMP: 1303 case FASTTRAP_T_CALL: 1304 if (tp->ftt_code == 0) { 1305 new_pc = tp->ftt_dest; 1306 } else { 1307 user_addr_t /* value ,*/ addr = tp->ftt_dest; 1308 1309 if (tp->ftt_base != FASTTRAP_NOREG) 1310 addr += fasttrap_getreg(regs, tp->ftt_base); 1311 if (tp->ftt_index != FASTTRAP_NOREG) 1312 addr += fasttrap_getreg(regs, tp->ftt_index) << 1313 tp->ftt_scale; 1314 1315 if (tp->ftt_code == 1) { 1316 /* 1317 * If there's a segment prefix for this 1318 * instruction, we'll need to check permissions 1319 * and bounds on the given selector, and adjust 1320 * the address accordingly. 1321 */ 1322 if (tp->ftt_segment != FASTTRAP_SEG_NONE && 1323 fasttrap_do_seg(tp, regs, &addr) != 0) { 1324 fasttrap_sigsegv(p, uthread, addr); 1325 new_pc = pc; 1326 break; 1327 } 1328 1329 uint32_t value32; 1330 addr = (user_addr_t)(uint32_t)addr; 1331 if (fasttrap_fuword32(addr, &value32) == -1) { 1332 fasttrap_sigsegv(p, uthread, addr); 1333 new_pc = pc; 1334 break; 1335 } 1336 new_pc = value32; 1337 } else { 1338 new_pc = addr; 1339 } 1340 } 1341 1342 /* 1343 * If this is a call instruction, we need to push the return 1344 * address onto the stack. If this fails, we send the process 1345 * a SIGSEGV and reset the pc to emulate what would happen if 1346 * this instruction weren't traced. 1347 */ 1348 if (tp->ftt_type == FASTTRAP_T_CALL) { 1349 user_addr_t addr = regs32->uesp - sizeof (uint32_t); 1350 int ret = fasttrap_suword32(addr, (uint32_t)(pc + tp->ftt_size)); 1351 1352 if (ret == -1) { 1353 fasttrap_sigsegv(p, uthread, addr); 1354 new_pc = pc; 1355 break; 1356 } 1357 1358 regs32->uesp = addr; 1359 } 1360 break; 1361 1362 case FASTTRAP_T_COMMON: 1363 { 1364 user_addr_t addr; 1365 uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 7]; 1366 uint_t i = 0; 1367 1368 /* 1369 * Generic Instruction Tracing 1370 * --------------------------- 1371 * 1372 * This is the layout of the scratch space in the user-land 1373 * thread structure for our generated instructions. 1374 * 1375 * 32-bit mode bytes 1376 * ------------------------ ----- 1377 * a: <original instruction> <= 15 1378 * jmp <pc + tp->ftt_size> 5 1379 * b: <original instrction> <= 15 1380 * int T_DTRACE_RET 2 1381 * ----- 1382 * <= 37 1383 * 1384 * 64-bit mode bytes 1385 * ------------------------ ----- 1386 * a: <original instruction> <= 15 1387 * jmp 0(%rip) 6 1388 * <pc + tp->ftt_size> 8 1389 * b: <original instruction> <= 15 1390 * int T_DTRACE_RET 2 1391 * ----- 1392 * <= 46 1393 * 1394 * The %pc is set to a, and curthread->t_dtrace_astpc is set 1395 * to b. If we encounter a signal on the way out of the 1396 * kernel, trap() will set %pc to curthread->t_dtrace_astpc 1397 * so that we execute the original instruction and re-enter 1398 * the kernel rather than redirecting to the next instruction. 1399 * 1400 * If there are return probes (so we know that we're going to 1401 * need to reenter the kernel after executing the original 1402 * instruction), the scratch space will just contain the 1403 * original instruction followed by an interrupt -- the same 1404 * data as at b. 1405 */ 1406 1407 addr = uthread->t_dtrace_scratch->addr; 1408 1409 if (addr == 0LL) { 1410 fasttrap_sigtrap(p, uthread, pc); // Should be killing target proc 1411 new_pc = pc; 1412 break; 1413 } 1414 1415 ASSERT(tp->ftt_size < FASTTRAP_MAX_INSTR_SIZE); 1416 1417 uthread->t_dtrace_scrpc = addr; 1418 bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size); 1419 i += tp->ftt_size; 1420 1421 /* 1422 * Set up the jmp to the next instruction; note that 1423 * the size of the traced instruction cancels out. 1424 */ 1425 scratch[i++] = FASTTRAP_JMP32; 1426 /* LINTED - alignment */ 1427 *(uint32_t *)&scratch[i] = pc - addr - 5; 1428 i += sizeof (uint32_t); 1429 1430 uthread->t_dtrace_astpc = addr + i; 1431 bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size); 1432 i += tp->ftt_size; 1433 scratch[i++] = FASTTRAP_INT; 1434 scratch[i++] = T_DTRACE_RET; 1435 1436 ASSERT(i <= sizeof (scratch)); 1437 1438 if (fasttrap_copyout(scratch, addr, i)) { 1439 fasttrap_sigtrap(p, uthread, pc); 1440 new_pc = pc; 1441 break; 1442 } 1443 1444 if (tp->ftt_retids != NULL) { 1445 uthread->t_dtrace_step = 1; 1446 uthread->t_dtrace_ret = 1; 1447 new_pc = uthread->t_dtrace_astpc; 1448 } else { 1449 new_pc = uthread->t_dtrace_scrpc; 1450 } 1451 1452 uthread->t_dtrace_pc = pc; 1453 uthread->t_dtrace_npc = pc + tp->ftt_size; 1454 uthread->t_dtrace_on = 1; 1455 break; 1456 } 1457 1458 default: 1459 panic("fasttrap: mishandled an instruction"); 1460 } 1461 1462done: 1463 /* 1464 * APPLE NOTE: 1465 * 1466 * We're setting this earlier than Solaris does, to get a "correct" 1467 * ustack() output. In the Sun code, a() -> b() -> c() -> d() is 1468 * reported at: d, b, a. The new way gives c, b, a, which is closer 1469 * to correct, as the return instruction has already exectued. 1470 */ 1471 regs32->eip = new_pc; 1472 1473 /* 1474 * If there were no return probes when we first found the tracepoint, 1475 * we should feel no obligation to honor any return probes that were 1476 * subsequently enabled -- they'll just have to wait until the next 1477 * time around. 1478 */ 1479 if (tp->ftt_retids != NULL) { 1480 /* 1481 * We need to wait until the results of the instruction are 1482 * apparent before invoking any return probes. If this 1483 * instruction was emulated we can just call 1484 * fasttrap_return_common(); if it needs to be executed, we 1485 * need to wait until the user thread returns to the kernel. 1486 */ 1487 if (tp->ftt_type != FASTTRAP_T_COMMON) { 1488 fasttrap_return_common(regs, pc, pid, new_pc); 1489 } else { 1490 ASSERT(uthread->t_dtrace_ret != 0); 1491 ASSERT(uthread->t_dtrace_pc == pc); 1492 ASSERT(uthread->t_dtrace_scrpc != 0); 1493 ASSERT(new_pc == uthread->t_dtrace_astpc); 1494 } 1495 } 1496 1497 return (0); 1498} 1499 1500/* 1501 * Due to variances between Solaris and xnu, I have split this into a 32 bit and 64 bit 1502 * code path. It still takes an x86_saved_state_t* argument, because it must sometimes 1503 * call other methods that require a x86_saved_state_t. 1504 * 1505 * NOTE!!!! 1506 * 1507 * Any changes made to this method must be echo'd in fasttrap_pid_probe32! 1508 * 1509 */ 1510static int 1511fasttrap_pid_probe64(x86_saved_state_t *regs) 1512{ 1513 ASSERT(is_saved_state64(regs)); 1514 1515 x86_saved_state64_t *regs64 = saved_state64(regs); 1516 user_addr_t pc = regs64->isf.rip - 1; 1517 proc_t *p = current_proc(); 1518 user_addr_t new_pc = 0; 1519 fasttrap_bucket_t *bucket; 1520 lck_mtx_t *pid_mtx; 1521 fasttrap_tracepoint_t *tp, tp_local; 1522 pid_t pid; 1523 dtrace_icookie_t cookie; 1524 uint_t is_enabled = 0; 1525 1526 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); 1527 1528 /* 1529 * It's possible that a user (in a veritable orgy of bad planning) 1530 * could redirect this thread's flow of control before it reached the 1531 * return probe fasttrap. In this case we need to kill the process 1532 * since it's in a unrecoverable state. 1533 */ 1534 if (uthread->t_dtrace_step) { 1535 ASSERT(uthread->t_dtrace_on); 1536 fasttrap_sigtrap(p, uthread, pc); 1537 return (0); 1538 } 1539 1540 /* 1541 * Clear all user tracing flags. 1542 */ 1543 uthread->t_dtrace_ft = 0; 1544 uthread->t_dtrace_pc = 0; 1545 uthread->t_dtrace_npc = 0; 1546 uthread->t_dtrace_scrpc = 0; 1547 uthread->t_dtrace_astpc = 0; 1548 uthread->t_dtrace_regv = 0; 1549 1550 /* 1551 * Treat a child created by a call to vfork(2) as if it were its 1552 * parent. We know that there's only one thread of control in such a 1553 * process: this one. 1554 */ 1555 /* 1556 * APPLE NOTE: Terry says: "You need to hold the process locks (currently: kernel funnel) for this traversal" 1557 * FIXME: How do we assert this? 1558 */ 1559 while (p->p_lflag & P_LINVFORK) 1560 p = p->p_pptr; 1561 1562 pid = p->p_pid; 1563 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; 1564 lck_mtx_lock(pid_mtx); 1565 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; 1566 1567 /* 1568 * Lookup the tracepoint that the process just hit. 1569 */ 1570 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { 1571 if (pid == tp->ftt_pid && pc == tp->ftt_pc && 1572 tp->ftt_proc->ftpc_acount != 0) 1573 break; 1574 } 1575 1576 /* 1577 * If we couldn't find a matching tracepoint, either a tracepoint has 1578 * been inserted without using the pid<pid> ioctl interface (see 1579 * fasttrap_ioctl), or somehow we have mislaid this tracepoint. 1580 */ 1581 if (tp == NULL) { 1582 lck_mtx_unlock(pid_mtx); 1583 return (-1); 1584 } 1585 1586 /* 1587 * Set the program counter to the address of the traced instruction 1588 * so that it looks right in ustack() output. 1589 */ 1590 regs64->isf.rip = pc; 1591 1592 if (tp->ftt_ids != NULL) { 1593 fasttrap_id_t *id; 1594 1595 for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { 1596 fasttrap_probe_t *probe = id->fti_probe; 1597 1598 if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) { 1599 dtrace_probe(dtrace_probeid_error, 0 /* state */, probe->ftp_id, 1600 1 /* ndx */, -1 /* offset */, DTRACEFLT_UPRIV); 1601 } else if (id->fti_ptype == DTFTP_ENTRY) { 1602 /* 1603 * We note that this was an entry 1604 * probe to help ustack() find the 1605 * first caller. 1606 */ 1607 cookie = dtrace_interrupt_disable(); 1608 DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); 1609 dtrace_probe(probe->ftp_id, regs64->rdi, 1610 regs64->rsi, regs64->rdx, regs64->rcx, 1611 regs64->r8); 1612 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); 1613 dtrace_interrupt_enable(cookie); 1614 } else if (id->fti_ptype == DTFTP_IS_ENABLED) { 1615 /* 1616 * Note that in this case, we don't 1617 * call dtrace_probe() since it's only 1618 * an artificial probe meant to change 1619 * the flow of control so that it 1620 * encounters the true probe. 1621 */ 1622 is_enabled = 1; 1623 } else if (probe->ftp_argmap == NULL) { 1624 dtrace_probe(probe->ftp_id, regs64->rdi, 1625 regs64->rsi, regs64->rdx, regs64->rcx, 1626 regs64->r8); 1627 } else { 1628 uint64_t t[5]; 1629 1630 fasttrap_usdt_args64(probe, regs64, 1631 sizeof (t) / sizeof (t[0]), t); 1632 1633 dtrace_probe(probe->ftp_id, t[0], t[1], 1634 t[2], t[3], t[4]); 1635 } 1636 1637 /* APPLE NOTE: Oneshot probes get one and only one chance... */ 1638 if (probe->ftp_prov->ftp_provider_type == DTFTP_PROVIDER_ONESHOT) { 1639 fasttrap_tracepoint_remove(p, tp); 1640 } 1641 } 1642 } 1643 1644 /* 1645 * We're about to do a bunch of work so we cache a local copy of 1646 * the tracepoint to emulate the instruction, and then find the 1647 * tracepoint again later if we need to light up any return probes. 1648 */ 1649 tp_local = *tp; 1650 lck_mtx_unlock(pid_mtx); 1651 tp = &tp_local; 1652 1653 /* 1654 * Set the program counter to appear as though the traced instruction 1655 * had completely executed. This ensures that fasttrap_getreg() will 1656 * report the expected value for REG_RIP. 1657 */ 1658 regs64->isf.rip = pc + tp->ftt_size; 1659 1660 /* 1661 * If there's an is-enabled probe connected to this tracepoint it 1662 * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax' 1663 * instruction that was placed there by DTrace when the binary was 1664 * linked. As this probe is, in fact, enabled, we need to stuff 1 1665 * into %eax or %rax. Accordingly, we can bypass all the instruction 1666 * emulation logic since we know the inevitable result. It's possible 1667 * that a user could construct a scenario where the 'is-enabled' 1668 * probe was on some other instruction, but that would be a rather 1669 * exotic way to shoot oneself in the foot. 1670 */ 1671 if (is_enabled) { 1672 regs64->rax = 1; 1673 new_pc = regs64->isf.rip; 1674 goto done; 1675 } 1676 1677 /* 1678 * We emulate certain types of instructions to ensure correctness 1679 * (in the case of position dependent instructions) or optimize 1680 * common cases. The rest we have the thread execute back in user- 1681 * land. 1682 */ 1683 switch (tp->ftt_type) { 1684 case FASTTRAP_T_RET: 1685 case FASTTRAP_T_RET16: 1686 { 1687 user_addr_t dst; 1688 user_addr_t addr; 1689 int ret; 1690 1691 /* 1692 * We have to emulate _every_ facet of the behavior of a ret 1693 * instruction including what happens if the load from %esp 1694 * fails; in that case, we send a SIGSEGV. 1695 */ 1696 ret = fasttrap_fuword64((user_addr_t)regs64->isf.rsp, &dst); 1697 addr = regs64->isf.rsp + sizeof (uint64_t); 1698 1699 if (ret == -1) { 1700 fasttrap_sigsegv(p, uthread, (user_addr_t)regs64->isf.rsp); 1701 new_pc = pc; 1702 break; 1703 } 1704 1705 if (tp->ftt_type == FASTTRAP_T_RET16) 1706 addr += tp->ftt_dest; 1707 1708 regs64->isf.rsp = addr; 1709 new_pc = dst; 1710 break; 1711 } 1712 1713 case FASTTRAP_T_JCC: 1714 { 1715 uint_t taken; 1716 1717 switch (tp->ftt_code) { 1718 case FASTTRAP_JO: 1719 taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_OF) != 0; 1720 break; 1721 case FASTTRAP_JNO: 1722 taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_OF) == 0; 1723 break; 1724 case FASTTRAP_JB: 1725 taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_CF) != 0; 1726 break; 1727 case FASTTRAP_JAE: 1728 taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_CF) == 0; 1729 break; 1730 case FASTTRAP_JE: 1731 taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_ZF) != 0; 1732 break; 1733 case FASTTRAP_JNE: 1734 taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_ZF) == 0; 1735 break; 1736 case FASTTRAP_JBE: 1737 taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_CF) != 0 || 1738 (regs64->isf.rflags & FASTTRAP_EFLAGS_ZF) != 0; 1739 break; 1740 case FASTTRAP_JA: 1741 taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_CF) == 0 && 1742 (regs64->isf.rflags & FASTTRAP_EFLAGS_ZF) == 0; 1743 break; 1744 case FASTTRAP_JS: 1745 taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_SF) != 0; 1746 break; 1747 case FASTTRAP_JNS: 1748 taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_SF) == 0; 1749 break; 1750 case FASTTRAP_JP: 1751 taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_PF) != 0; 1752 break; 1753 case FASTTRAP_JNP: 1754 taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_PF) == 0; 1755 break; 1756 case FASTTRAP_JL: 1757 taken = ((regs64->isf.rflags & FASTTRAP_EFLAGS_SF) == 0) != 1758 ((regs64->isf.rflags & FASTTRAP_EFLAGS_OF) == 0); 1759 break; 1760 case FASTTRAP_JGE: 1761 taken = ((regs64->isf.rflags & FASTTRAP_EFLAGS_SF) == 0) == 1762 ((regs64->isf.rflags & FASTTRAP_EFLAGS_OF) == 0); 1763 break; 1764 case FASTTRAP_JLE: 1765 taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_ZF) != 0 || 1766 ((regs64->isf.rflags & FASTTRAP_EFLAGS_SF) == 0) != 1767 ((regs64->isf.rflags & FASTTRAP_EFLAGS_OF) == 0); 1768 break; 1769 case FASTTRAP_JG: 1770 taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_ZF) == 0 && 1771 ((regs64->isf.rflags & FASTTRAP_EFLAGS_SF) == 0) == 1772 ((regs64->isf.rflags & FASTTRAP_EFLAGS_OF) == 0); 1773 break; 1774 default: 1775 taken = FALSE; 1776 } 1777 1778 if (taken) 1779 new_pc = tp->ftt_dest; 1780 else 1781 new_pc = pc + tp->ftt_size; 1782 break; 1783 } 1784 1785 case FASTTRAP_T_LOOP: 1786 { 1787 uint_t taken; 1788 uint64_t cx = regs64->rcx--; 1789 1790 switch (tp->ftt_code) { 1791 case FASTTRAP_LOOPNZ: 1792 taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_ZF) == 0 && 1793 cx != 0; 1794 break; 1795 case FASTTRAP_LOOPZ: 1796 taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_ZF) != 0 && 1797 cx != 0; 1798 break; 1799 case FASTTRAP_LOOP: 1800 taken = (cx != 0); 1801 break; 1802 default: 1803 taken = FALSE; 1804 } 1805 1806 if (taken) 1807 new_pc = tp->ftt_dest; 1808 else 1809 new_pc = pc + tp->ftt_size; 1810 break; 1811 } 1812 1813 case FASTTRAP_T_JCXZ: 1814 { 1815 uint64_t cx = regs64->rcx; 1816 1817 if (cx == 0) 1818 new_pc = tp->ftt_dest; 1819 else 1820 new_pc = pc + tp->ftt_size; 1821 break; 1822 } 1823 1824 case FASTTRAP_T_PUSHL_EBP: 1825 { 1826 user_addr_t addr = regs64->isf.rsp - sizeof (uint64_t); 1827 int ret = fasttrap_suword64(addr, (uint64_t)regs64->rbp); 1828 1829 if (ret == -1) { 1830 fasttrap_sigsegv(p, uthread, addr); 1831 new_pc = pc; 1832 break; 1833 } 1834 1835 regs64->isf.rsp = addr; 1836 new_pc = pc + tp->ftt_size; 1837 break; 1838 } 1839 1840 case FASTTRAP_T_NOP: 1841 new_pc = pc + tp->ftt_size; 1842 break; 1843 1844 case FASTTRAP_T_JMP: 1845 case FASTTRAP_T_CALL: 1846 if (tp->ftt_code == 0) { 1847 new_pc = tp->ftt_dest; 1848 } else { 1849 user_addr_t value, addr = tp->ftt_dest; 1850 1851 if (tp->ftt_base != FASTTRAP_NOREG) 1852 addr += fasttrap_getreg(regs, tp->ftt_base); 1853 if (tp->ftt_index != FASTTRAP_NOREG) 1854 addr += fasttrap_getreg(regs, tp->ftt_index) << 1855 tp->ftt_scale; 1856 1857 if (tp->ftt_code == 1) { 1858 /* 1859 * If there's a segment prefix for this 1860 * instruction, we'll need to check permissions 1861 * and bounds on the given selector, and adjust 1862 * the address accordingly. 1863 */ 1864 if (tp->ftt_segment != FASTTRAP_SEG_NONE && 1865 fasttrap_do_seg(tp, regs, &addr) != 0) { 1866 fasttrap_sigsegv(p, uthread, addr); 1867 new_pc = pc; 1868 break; 1869 } 1870 1871 if (fasttrap_fuword64(addr, &value) == -1) { 1872 fasttrap_sigsegv(p, uthread, addr); 1873 new_pc = pc; 1874 break; 1875 } 1876 new_pc = value; 1877 } else { 1878 new_pc = addr; 1879 } 1880 } 1881 1882 /* 1883 * If this is a call instruction, we need to push the return 1884 * address onto the stack. If this fails, we send the process 1885 * a SIGSEGV and reset the pc to emulate what would happen if 1886 * this instruction weren't traced. 1887 */ 1888 if (tp->ftt_type == FASTTRAP_T_CALL) { 1889 user_addr_t addr = regs64->isf.rsp - sizeof (uint64_t); 1890 int ret = fasttrap_suword64(addr, pc + tp->ftt_size); 1891 1892 if (ret == -1) { 1893 fasttrap_sigsegv(p, uthread, addr); 1894 new_pc = pc; 1895 break; 1896 } 1897 1898 regs64->isf.rsp = addr; 1899 } 1900 break; 1901 1902 case FASTTRAP_T_COMMON: 1903 { 1904 user_addr_t addr; 1905 uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 22]; 1906 uint_t i = 0; 1907 1908 /* 1909 * Generic Instruction Tracing 1910 * --------------------------- 1911 * 1912 * This is the layout of the scratch space in the user-land 1913 * thread structure for our generated instructions. 1914 * 1915 * 32-bit mode bytes 1916 * ------------------------ ----- 1917 * a: <original instruction> <= 15 1918 * jmp <pc + tp->ftt_size> 5 1919 * b: <original instrction> <= 15 1920 * int T_DTRACE_RET 2 1921 * ----- 1922 * <= 37 1923 * 1924 * 64-bit mode bytes 1925 * ------------------------ ----- 1926 * a: <original instruction> <= 15 1927 * jmp 0(%rip) 6 1928 * <pc + tp->ftt_size> 8 1929 * b: <original instruction> <= 15 1930 * int T_DTRACE_RET 2 1931 * ----- 1932 * <= 46 1933 * 1934 * The %pc is set to a, and curthread->t_dtrace_astpc is set 1935 * to b. If we encounter a signal on the way out of the 1936 * kernel, trap() will set %pc to curthread->t_dtrace_astpc 1937 * so that we execute the original instruction and re-enter 1938 * the kernel rather than redirecting to the next instruction. 1939 * 1940 * If there are return probes (so we know that we're going to 1941 * need to reenter the kernel after executing the original 1942 * instruction), the scratch space will just contain the 1943 * original instruction followed by an interrupt -- the same 1944 * data as at b. 1945 * 1946 * %rip-relative Addressing 1947 * ------------------------ 1948 * 1949 * There's a further complication in 64-bit mode due to %rip- 1950 * relative addressing. While this is clearly a beneficial 1951 * architectural decision for position independent code, it's 1952 * hard not to see it as a personal attack against the pid 1953 * provider since before there was a relatively small set of 1954 * instructions to emulate; with %rip-relative addressing, 1955 * almost every instruction can potentially depend on the 1956 * address at which it's executed. Rather than emulating 1957 * the broad spectrum of instructions that can now be 1958 * position dependent, we emulate jumps and others as in 1959 * 32-bit mode, and take a different tack for instructions 1960 * using %rip-relative addressing. 1961 * 1962 * For every instruction that uses the ModRM byte, the 1963 * in-kernel disassembler reports its location. We use the 1964 * ModRM byte to identify that an instruction uses 1965 * %rip-relative addressing and to see what other registers 1966 * the instruction uses. To emulate those instructions, 1967 * we modify the instruction to be %rax-relative rather than 1968 * %rip-relative (or %rcx-relative if the instruction uses 1969 * %rax; or %r8- or %r9-relative if the REX.B is present so 1970 * we don't have to rewrite the REX prefix). We then load 1971 * the value that %rip would have been into the scratch 1972 * register and generate an instruction to reset the scratch 1973 * register back to its original value. The instruction 1974 * sequence looks like this: 1975 * 1976 * 64-mode %rip-relative bytes 1977 * ------------------------ ----- 1978 * a: <modified instruction> <= 15 1979 * movq $<value>, %<scratch> 6 1980 * jmp 0(%rip) 6 1981 * <pc + tp->ftt_size> 8 1982 * b: <modified instruction> <= 15 1983 * int T_DTRACE_RET 2 1984 * ----- 1985 * 52 1986 * 1987 * We set curthread->t_dtrace_regv so that upon receiving 1988 * a signal we can reset the value of the scratch register. 1989 */ 1990 1991 addr = uthread->t_dtrace_scratch->addr; 1992 1993 if (addr == 0LL) { 1994 fasttrap_sigtrap(p, uthread, pc); // Should be killing target proc 1995 new_pc = pc; 1996 break; 1997 } 1998 1999 ASSERT(tp->ftt_size < FASTTRAP_MAX_INSTR_SIZE); 2000 2001 uthread->t_dtrace_scrpc = addr; 2002 bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size); 2003 i += tp->ftt_size; 2004 2005 if (tp->ftt_ripmode != 0) { 2006 uint64_t* reg; 2007 2008 ASSERT(tp->ftt_ripmode & 2009 (FASTTRAP_RIP_1 | FASTTRAP_RIP_2)); 2010 2011 /* 2012 * If this was a %rip-relative instruction, we change 2013 * it to be either a %rax- or %rcx-relative 2014 * instruction (depending on whether those registers 2015 * are used as another operand; or %r8- or %r9- 2016 * relative depending on the value of REX.B). We then 2017 * set that register and generate a movq instruction 2018 * to reset the value. 2019 */ 2020 if (tp->ftt_ripmode & FASTTRAP_RIP_X) 2021 scratch[i++] = FASTTRAP_REX(1, 0, 0, 1); 2022 else 2023 scratch[i++] = FASTTRAP_REX(1, 0, 0, 0); 2024 2025 if (tp->ftt_ripmode & FASTTRAP_RIP_1) 2026 scratch[i++] = FASTTRAP_MOV_EAX; 2027 else 2028 scratch[i++] = FASTTRAP_MOV_ECX; 2029 2030 switch (tp->ftt_ripmode) { 2031 case FASTTRAP_RIP_1: 2032 reg = ®s64->rax; 2033 uthread->t_dtrace_reg = REG_RAX; 2034 break; 2035 case FASTTRAP_RIP_2: 2036 reg = ®s64->rcx; 2037 uthread->t_dtrace_reg = REG_RCX; 2038 break; 2039 case FASTTRAP_RIP_1 | FASTTRAP_RIP_X: 2040 reg = ®s64->r8; 2041 uthread->t_dtrace_reg = REG_R8; 2042 break; 2043 case FASTTRAP_RIP_2 | FASTTRAP_RIP_X: 2044 reg = ®s64->r9; 2045 uthread->t_dtrace_reg = REG_R9; 2046 break; 2047 default: 2048 reg = NULL; 2049 panic("unhandled ripmode in fasttrap_pid_probe64"); 2050 } 2051 2052 /* LINTED - alignment */ 2053 *(uint64_t *)&scratch[i] = *reg; 2054 uthread->t_dtrace_regv = *reg; 2055 *reg = pc + tp->ftt_size; 2056 i += sizeof (uint64_t); 2057 } 2058 2059 /* 2060 * Generate the branch instruction to what would have 2061 * normally been the subsequent instruction. In 32-bit mode, 2062 * this is just a relative branch; in 64-bit mode this is a 2063 * %rip-relative branch that loads the 64-bit pc value 2064 * immediately after the jmp instruction. 2065 */ 2066 scratch[i++] = FASTTRAP_GROUP5_OP; 2067 scratch[i++] = FASTTRAP_MODRM(0, 4, 5); 2068 /* LINTED - alignment */ 2069 *(uint32_t *)&scratch[i] = 0; 2070 i += sizeof (uint32_t); 2071 /* LINTED - alignment */ 2072 *(uint64_t *)&scratch[i] = pc + tp->ftt_size; 2073 i += sizeof (uint64_t); 2074 2075 uthread->t_dtrace_astpc = addr + i; 2076 bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size); 2077 i += tp->ftt_size; 2078 scratch[i++] = FASTTRAP_INT; 2079 scratch[i++] = T_DTRACE_RET; 2080 2081 ASSERT(i <= sizeof (scratch)); 2082 2083 if (fasttrap_copyout(scratch, addr, i)) { 2084 fasttrap_sigtrap(p, uthread, pc); 2085 new_pc = pc; 2086 break; 2087 } 2088 2089 if (tp->ftt_retids != NULL) { 2090 uthread->t_dtrace_step = 1; 2091 uthread->t_dtrace_ret = 1; 2092 new_pc = uthread->t_dtrace_astpc; 2093 } else { 2094 new_pc = uthread->t_dtrace_scrpc; 2095 } 2096 2097 uthread->t_dtrace_pc = pc; 2098 uthread->t_dtrace_npc = pc + tp->ftt_size; 2099 uthread->t_dtrace_on = 1; 2100 break; 2101 } 2102 2103 default: 2104 panic("fasttrap: mishandled an instruction"); 2105 } 2106 2107done: 2108 /* 2109 * APPLE NOTE: 2110 * 2111 * We're setting this earlier than Solaris does, to get a "correct" 2112 * ustack() output. In the Sun code, a() -> b() -> c() -> d() is 2113 * reported at: d, b, a. The new way gives c, b, a, which is closer 2114 * to correct, as the return instruction has already exectued. 2115 */ 2116 regs64->isf.rip = new_pc; 2117 2118 2119 /* 2120 * If there were no return probes when we first found the tracepoint, 2121 * we should feel no obligation to honor any return probes that were 2122 * subsequently enabled -- they'll just have to wait until the next 2123 * time around. 2124 */ 2125 if (tp->ftt_retids != NULL) { 2126 /* 2127 * We need to wait until the results of the instruction are 2128 * apparent before invoking any return probes. If this 2129 * instruction was emulated we can just call 2130 * fasttrap_return_common(); if it needs to be executed, we 2131 * need to wait until the user thread returns to the kernel. 2132 */ 2133 if (tp->ftt_type != FASTTRAP_T_COMMON) { 2134 fasttrap_return_common(regs, pc, pid, new_pc); 2135 } else { 2136 ASSERT(uthread->t_dtrace_ret != 0); 2137 ASSERT(uthread->t_dtrace_pc == pc); 2138 ASSERT(uthread->t_dtrace_scrpc != 0); 2139 ASSERT(new_pc == uthread->t_dtrace_astpc); 2140 } 2141 } 2142 2143 return (0); 2144} 2145 2146int 2147fasttrap_pid_probe(x86_saved_state_t *regs) 2148{ 2149 if (is_saved_state64(regs)) 2150 return fasttrap_pid_probe64(regs); 2151 2152 return fasttrap_pid_probe32(regs); 2153} 2154 2155int 2156fasttrap_return_probe(x86_saved_state_t *regs) 2157{ 2158 x86_saved_state64_t *regs64; 2159 x86_saved_state32_t *regs32; 2160 unsigned int p_model; 2161 2162 if (is_saved_state64(regs)) { 2163 regs64 = saved_state64(regs); 2164 regs32 = NULL; 2165 p_model = DATAMODEL_LP64; 2166 } else { 2167 regs64 = NULL; 2168 regs32 = saved_state32(regs); 2169 p_model = DATAMODEL_ILP32; 2170 } 2171 2172 proc_t *p = current_proc(); 2173 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); 2174 user_addr_t pc = uthread->t_dtrace_pc; 2175 user_addr_t npc = uthread->t_dtrace_npc; 2176 2177 uthread->t_dtrace_pc = 0; 2178 uthread->t_dtrace_npc = 0; 2179 uthread->t_dtrace_scrpc = 0; 2180 uthread->t_dtrace_astpc = 0; 2181 2182 /* 2183 * Treat a child created by a call to vfork(2) as if it were its 2184 * parent. We know that there's only one thread of control in such a 2185 * process: this one. 2186 */ 2187 /* 2188 * APPLE NOTE: Terry says: "You need to hold the process locks (currently: kernel funnel) for this traversal" 2189 * How do we assert this? 2190 */ 2191 while (p->p_lflag & P_LINVFORK) { 2192 p = p->p_pptr; 2193 } 2194 2195 /* 2196 * We set rp->r_pc to the address of the traced instruction so 2197 * that it appears to dtrace_probe() that we're on the original 2198 * instruction, and so that the user can't easily detect our 2199 * complex web of lies. dtrace_return_probe() (our caller) 2200 * will correctly set %pc after we return. 2201 */ 2202 if (p_model == DATAMODEL_LP64) 2203 regs64->isf.rip = pc; 2204 else 2205 regs32->eip = pc; 2206 2207 fasttrap_return_common(regs, pc, p->p_pid, npc); 2208 2209 return (0); 2210} 2211 2212uint64_t 2213fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno, 2214 int aframes) 2215{ 2216 pal_register_cache_state(current_thread(), VALID); 2217#pragma unused(arg, id, parg, aframes) 2218 return (fasttrap_anarg((x86_saved_state_t *)find_user_regs(current_thread()), 1, argno)); 2219} 2220 2221uint64_t 2222fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, 2223 int aframes) 2224{ 2225 pal_register_cache_state(current_thread(), VALID); 2226#pragma unused(arg, id, parg, aframes) 2227 return (fasttrap_anarg((x86_saved_state_t *)find_user_regs(current_thread()), 0, argno)); 2228} 2229 2230/* 2231 * APPLE NOTE: See comments by regmap array definition. We are cheating 2232 * when returning 32 bit registers. 2233 */ 2234static user_addr_t 2235fasttrap_getreg(x86_saved_state_t *regs, uint_t reg) 2236{ 2237 if (is_saved_state64(regs)) { 2238 x86_saved_state64_t *regs64 = saved_state64(regs); 2239 2240 switch (reg) { 2241 case REG_RAX: return regs64->rax; 2242 case REG_RCX: return regs64->rcx; 2243 case REG_RDX: return regs64->rdx; 2244 case REG_RBX: return regs64->rbx; 2245 case REG_RSP: return regs64->isf.rsp; 2246 case REG_RBP: return regs64->rbp; 2247 case REG_RSI: return regs64->rsi; 2248 case REG_RDI: return regs64->rdi; 2249 case REG_R8: return regs64->r8; 2250 case REG_R9: return regs64->r9; 2251 case REG_R10: return regs64->r10; 2252 case REG_R11: return regs64->r11; 2253 case REG_R12: return regs64->r12; 2254 case REG_R13: return regs64->r13; 2255 case REG_R14: return regs64->r14; 2256 case REG_R15: return regs64->r15; 2257 case REG_TRAPNO: return regs64->isf.trapno; 2258 case REG_ERR: return regs64->isf.err; 2259 case REG_RIP: return regs64->isf.rip; 2260 case REG_CS: return regs64->isf.cs; 2261 case REG_RFL: return regs64->isf.rflags; 2262 case REG_SS: return regs64->isf.ss; 2263 case REG_FS: return regs64->fs; 2264 case REG_GS: return regs64->gs; 2265 case REG_ES: 2266 case REG_DS: 2267 case REG_FSBASE: 2268 case REG_GSBASE: 2269 // Important to distinguish these requests (which should be legal) from other values. 2270 panic("dtrace: unimplemented x86_64 getreg()"); 2271 } 2272 2273 panic("dtrace: unhandled x86_64 getreg() constant"); 2274 } else { 2275 x86_saved_state32_t *regs32 = saved_state32(regs); 2276 2277 switch (reg) { 2278 case REG_RAX: return regs32->eax; 2279 case REG_RCX: return regs32->ecx; 2280 case REG_RDX: return regs32->edx; 2281 case REG_RBX: return regs32->ebx; 2282 case REG_RSP: return regs32->uesp; 2283 case REG_RBP: return regs32->ebp; 2284 case REG_RSI: return regs32->esi; 2285 case REG_RDI: return regs32->edi; 2286 } 2287 2288 panic("dtrace: unhandled i386 getreg() constant"); 2289 } 2290 2291 return 0; 2292} 2293