1/* 2 * Kernel Probes (KProbes) 3 * arch/x86_64/kernel/kprobes.c 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 * 19 * Copyright (C) IBM Corporation, 2002, 2004 20 * 21 * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel 22 * Probes initial implementation ( includes contributions from 23 * Rusty Russell). 24 * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes 25 * interface to access function arguments. 26 * 2004-Oct Jim Keniston <kenistoj@us.ibm.com> and Prasanna S Panchamukhi 27 * <prasanna@in.ibm.com> adapted for x86_64 28 * 2005-Mar Roland McGrath <roland@redhat.com> 29 * Fixed to handle %rip-relative addressing mode correctly. 30 * 2005-May Rusty Lynch <rusty.lynch@intel.com> 31 * Added function return probes functionality 32 */ 33 34#include <linux/kprobes.h> 35#include <linux/ptrace.h> 36#include <linux/string.h> 37#include <linux/slab.h> 38#include <linux/preempt.h> 39#include <linux/module.h> 40#include <linux/kdebug.h> 41 42#include <asm/cacheflush.h> 43#include <asm/pgtable.h> 44#include <asm/uaccess.h> 45 46void jprobe_return_end(void); 47static void __kprobes arch_copy_kprobe(struct kprobe *p); 48 49DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; 50DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); 51 52/* 53 * returns non-zero if opcode modifies the interrupt flag. 54 */ 55static __always_inline int is_IF_modifier(kprobe_opcode_t *insn) 56{ 57 switch (*insn) { 58 case 0xfa: /* cli */ 59 case 0xfb: /* sti */ 60 case 0xcf: /* iret/iretd */ 61 case 0x9d: /* popf/popfd */ 62 return 1; 63 } 64 65 if (*insn >= 0x40 && *insn <= 0x4f && *++insn == 0xcf) 66 return 1; 67 return 0; 68} 69 70int __kprobes arch_prepare_kprobe(struct kprobe *p) 71{ 72 /* insn: must be on special executable page on x86_64. */ 73 p->ainsn.insn = get_insn_slot(); 74 if (!p->ainsn.insn) { 75 return -ENOMEM; 76 } 77 arch_copy_kprobe(p); 78 return 0; 79} 80 81/* 82 * Determine if the instruction uses the %rip-relative addressing mode. 83 * If it does, return the address of the 32-bit displacement word. 84 * If not, return null. 85 */ 86static s32 __kprobes *is_riprel(u8 *insn) 87{ 88#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \ 89 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ 90 (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ 91 (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \ 92 (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \ 93 << (row % 64)) 94 static const u64 onebyte_has_modrm[256 / 64] = { 95 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 96 /* ------------------------------- */ 97 W(0x00, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 00 */ 98 W(0x10, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 10 */ 99 W(0x20, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* 20 */ 100 W(0x30, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0), /* 30 */ 101 W(0x40, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 40 */ 102 W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 50 */ 103 W(0x60, 0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0)| /* 60 */ 104 W(0x70, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 70 */ 105 W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ 106 W(0x90, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 90 */ 107 W(0xa0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* a0 */ 108 W(0xb0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* b0 */ 109 W(0xc0, 1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0)| /* c0 */ 110 W(0xd0, 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1)| /* d0 */ 111 W(0xe0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* e0 */ 112 W(0xf0, 0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1) /* f0 */ 113 /* ------------------------------- */ 114 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 115 }; 116 static const u64 twobyte_has_modrm[256 / 64] = { 117 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 118 /* ------------------------------- */ 119 W(0x00, 1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,1)| /* 0f */ 120 W(0x10, 1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0)| /* 1f */ 121 W(0x20, 1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1)| /* 2f */ 122 W(0x30, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 3f */ 123 W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 4f */ 124 W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 5f */ 125 W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 6f */ 126 W(0x70, 1,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1), /* 7f */ 127 W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 8f */ 128 W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 9f */ 129 W(0xa0, 0,0,0,1,1,1,1,1,0,0,0,1,1,1,1,1)| /* af */ 130 W(0xb0, 1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1), /* bf */ 131 W(0xc0, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0)| /* cf */ 132 W(0xd0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* df */ 133 W(0xe0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* ef */ 134 W(0xf0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0) /* ff */ 135 /* ------------------------------- */ 136 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 137 }; 138#undef W 139 int need_modrm; 140 141 /* Skip legacy instruction prefixes. */ 142 while (1) { 143 switch (*insn) { 144 case 0x66: 145 case 0x67: 146 case 0x2e: 147 case 0x3e: 148 case 0x26: 149 case 0x64: 150 case 0x65: 151 case 0x36: 152 case 0xf0: 153 case 0xf3: 154 case 0xf2: 155 ++insn; 156 continue; 157 } 158 break; 159 } 160 161 /* Skip REX instruction prefix. */ 162 if ((*insn & 0xf0) == 0x40) 163 ++insn; 164 165 if (*insn == 0x0f) { /* Two-byte opcode. */ 166 ++insn; 167 need_modrm = test_bit(*insn, twobyte_has_modrm); 168 } else { /* One-byte opcode. */ 169 need_modrm = test_bit(*insn, onebyte_has_modrm); 170 } 171 172 if (need_modrm) { 173 u8 modrm = *++insn; 174 if ((modrm & 0xc7) == 0x05) { /* %rip+disp32 addressing mode */ 175 /* Displacement follows ModRM byte. */ 176 return (s32 *) ++insn; 177 } 178 } 179 180 /* No %rip-relative addressing mode here. */ 181 return NULL; 182} 183 184static void __kprobes arch_copy_kprobe(struct kprobe *p) 185{ 186 s32 *ripdisp; 187 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE); 188 ripdisp = is_riprel(p->ainsn.insn); 189 if (ripdisp) { 190 /* 191 * The copied instruction uses the %rip-relative 192 * addressing mode. Adjust the displacement for the 193 * difference between the original location of this 194 * instruction and the location of the copy that will 195 * actually be run. The tricky bit here is making sure 196 * that the sign extension happens correctly in this 197 * calculation, since we need a signed 32-bit result to 198 * be sign-extended to 64 bits when it's added to the 199 * %rip value and yield the same 64-bit result that the 200 * sign-extension of the original signed 32-bit 201 * displacement would have given. 202 */ 203 s64 disp = (u8 *) p->addr + *ripdisp - (u8 *) p->ainsn.insn; 204 BUG_ON((s64) (s32) disp != disp); /* Sanity check. */ 205 *ripdisp = disp; 206 } 207 p->opcode = *p->addr; 208} 209 210void __kprobes arch_arm_kprobe(struct kprobe *p) 211{ 212 *p->addr = BREAKPOINT_INSTRUCTION; 213 flush_icache_range((unsigned long) p->addr, 214 (unsigned long) p->addr + sizeof(kprobe_opcode_t)); 215} 216 217void __kprobes arch_disarm_kprobe(struct kprobe *p) 218{ 219 *p->addr = p->opcode; 220 flush_icache_range((unsigned long) p->addr, 221 (unsigned long) p->addr + sizeof(kprobe_opcode_t)); 222} 223 224void __kprobes arch_remove_kprobe(struct kprobe *p) 225{ 226 mutex_lock(&kprobe_mutex); 227 free_insn_slot(p->ainsn.insn, 0); 228 mutex_unlock(&kprobe_mutex); 229} 230 231static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) 232{ 233 kcb->prev_kprobe.kp = kprobe_running(); 234 kcb->prev_kprobe.status = kcb->kprobe_status; 235 kcb->prev_kprobe.old_rflags = kcb->kprobe_old_rflags; 236 kcb->prev_kprobe.saved_rflags = kcb->kprobe_saved_rflags; 237} 238 239static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) 240{ 241 __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; 242 kcb->kprobe_status = kcb->prev_kprobe.status; 243 kcb->kprobe_old_rflags = kcb->prev_kprobe.old_rflags; 244 kcb->kprobe_saved_rflags = kcb->prev_kprobe.saved_rflags; 245} 246 247static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, 248 struct kprobe_ctlblk *kcb) 249{ 250 __get_cpu_var(current_kprobe) = p; 251 kcb->kprobe_saved_rflags = kcb->kprobe_old_rflags 252 = (regs->eflags & (TF_MASK | IF_MASK)); 253 if (is_IF_modifier(p->ainsn.insn)) 254 kcb->kprobe_saved_rflags &= ~IF_MASK; 255} 256 257static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) 258{ 259 regs->eflags |= TF_MASK; 260 regs->eflags &= ~IF_MASK; 261 /*single step inline if the instruction is an int3*/ 262 if (p->opcode == BREAKPOINT_INSTRUCTION) 263 regs->rip = (unsigned long)p->addr; 264 else 265 regs->rip = (unsigned long)p->ainsn.insn; 266} 267 268/* Called with kretprobe_lock held */ 269void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 270 struct pt_regs *regs) 271{ 272 unsigned long *sara = (unsigned long *)regs->rsp; 273 274 ri->ret_addr = (kprobe_opcode_t *) *sara; 275 /* Replace the return addr with trampoline addr */ 276 *sara = (unsigned long) &kretprobe_trampoline; 277} 278 279int __kprobes kprobe_handler(struct pt_regs *regs) 280{ 281 struct kprobe *p; 282 int ret = 0; 283 kprobe_opcode_t *addr = (kprobe_opcode_t *)(regs->rip - sizeof(kprobe_opcode_t)); 284 struct kprobe_ctlblk *kcb; 285 286 /* 287 * We don't want to be preempted for the entire 288 * duration of kprobe processing 289 */ 290 preempt_disable(); 291 kcb = get_kprobe_ctlblk(); 292 293 /* Check we're not actually recursing */ 294 if (kprobe_running()) { 295 p = get_kprobe(addr); 296 if (p) { 297 if (kcb->kprobe_status == KPROBE_HIT_SS && 298 *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { 299 regs->eflags &= ~TF_MASK; 300 regs->eflags |= kcb->kprobe_saved_rflags; 301 goto no_kprobe; 302 } else if (kcb->kprobe_status == KPROBE_HIT_SSDONE) { 303 /* TODO: Provide re-entrancy from 304 * post_kprobes_handler() and avoid exception 305 * stack corruption while single-stepping on 306 * the instruction of the new probe. 307 */ 308 arch_disarm_kprobe(p); 309 regs->rip = (unsigned long)p->addr; 310 reset_current_kprobe(); 311 ret = 1; 312 } else { 313 /* We have reentered the kprobe_handler(), since 314 * another probe was hit while within the 315 * handler. We here save the original kprobe 316 * variables and just single step on instruction 317 * of the new probe without calling any user 318 * handlers. 319 */ 320 save_previous_kprobe(kcb); 321 set_current_kprobe(p, regs, kcb); 322 kprobes_inc_nmissed_count(p); 323 prepare_singlestep(p, regs); 324 kcb->kprobe_status = KPROBE_REENTER; 325 return 1; 326 } 327 } else { 328 if (*addr != BREAKPOINT_INSTRUCTION) { 329 /* The breakpoint instruction was removed by 330 * another cpu right after we hit, no further 331 * handling of this interrupt is appropriate 332 */ 333 regs->rip = (unsigned long)addr; 334 ret = 1; 335 goto no_kprobe; 336 } 337 p = __get_cpu_var(current_kprobe); 338 if (p->break_handler && p->break_handler(p, regs)) { 339 goto ss_probe; 340 } 341 } 342 goto no_kprobe; 343 } 344 345 p = get_kprobe(addr); 346 if (!p) { 347 if (*addr != BREAKPOINT_INSTRUCTION) { 348 /* 349 * The breakpoint instruction was removed right 350 * after we hit it. Another cpu has removed 351 * either a probepoint or a debugger breakpoint 352 * at this address. In either case, no further 353 * handling of this interrupt is appropriate. 354 * Back up over the (now missing) int3 and run 355 * the original instruction. 356 */ 357 regs->rip = (unsigned long)addr; 358 ret = 1; 359 } 360 /* Not one of ours: let kernel handle it */ 361 goto no_kprobe; 362 } 363 364 set_current_kprobe(p, regs, kcb); 365 kcb->kprobe_status = KPROBE_HIT_ACTIVE; 366 367 if (p->pre_handler && p->pre_handler(p, regs)) 368 /* handler has already set things up, so skip ss setup */ 369 return 1; 370 371ss_probe: 372 prepare_singlestep(p, regs); 373 kcb->kprobe_status = KPROBE_HIT_SS; 374 return 1; 375 376no_kprobe: 377 preempt_enable_no_resched(); 378 return ret; 379} 380 381/* 382 * For function-return probes, init_kprobes() establishes a probepoint 383 * here. When a retprobed function returns, this probe is hit and 384 * trampoline_probe_handler() runs, calling the kretprobe's handler. 385 */ 386 void kretprobe_trampoline_holder(void) 387 { 388 asm volatile ( ".global kretprobe_trampoline\n" 389 "kretprobe_trampoline: \n" 390 "nop\n"); 391 } 392 393/* 394 * Called when we hit the probe point at kretprobe_trampoline 395 */ 396int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) 397{ 398 struct kretprobe_instance *ri = NULL; 399 struct hlist_head *head, empty_rp; 400 struct hlist_node *node, *tmp; 401 unsigned long flags, orig_ret_address = 0; 402 unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; 403 404 INIT_HLIST_HEAD(&empty_rp); 405 spin_lock_irqsave(&kretprobe_lock, flags); 406 head = kretprobe_inst_table_head(current); 407 408 /* 409 * It is possible to have multiple instances associated with a given 410 * task either because an multiple functions in the call path 411 * have a return probe installed on them, and/or more then one return 412 * return probe was registered for a target function. 413 * 414 * We can handle this because: 415 * - instances are always inserted at the head of the list 416 * - when multiple return probes are registered for the same 417 * function, the first instance's ret_addr will point to the 418 * real return address, and all the rest will point to 419 * kretprobe_trampoline 420 */ 421 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { 422 if (ri->task != current) 423 /* another task is sharing our hash bucket */ 424 continue; 425 426 if (ri->rp && ri->rp->handler) 427 ri->rp->handler(ri, regs); 428 429 orig_ret_address = (unsigned long)ri->ret_addr; 430 recycle_rp_inst(ri, &empty_rp); 431 432 if (orig_ret_address != trampoline_address) 433 /* 434 * This is the real return address. Any other 435 * instances associated with this task are for 436 * other calls deeper on the call stack 437 */ 438 break; 439 } 440 441 kretprobe_assert(ri, orig_ret_address, trampoline_address); 442 regs->rip = orig_ret_address; 443 444 reset_current_kprobe(); 445 spin_unlock_irqrestore(&kretprobe_lock, flags); 446 preempt_enable_no_resched(); 447 448 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 449 hlist_del(&ri->hlist); 450 kfree(ri); 451 } 452 /* 453 * By returning a non-zero value, we are telling 454 * kprobe_handler() that we don't want the post_handler 455 * to run (and have re-enabled preemption) 456 */ 457 return 1; 458} 459 460/* 461 * Called after single-stepping. p->addr is the address of the 462 * instruction whose first byte has been replaced by the "int 3" 463 * instruction. To avoid the SMP problems that can occur when we 464 * temporarily put back the original opcode to single-step, we 465 * single-stepped a copy of the instruction. The address of this 466 * copy is p->ainsn.insn. 467 * 468 * This function prepares to return from the post-single-step 469 * interrupt. We have to fix up the stack as follows: 470 * 471 * 0) Except in the case of absolute or indirect jump or call instructions, 472 * the new rip is relative to the copied instruction. We need to make 473 * it relative to the original instruction. 474 * 475 * 1) If the single-stepped instruction was pushfl, then the TF and IF 476 * flags are set in the just-pushed eflags, and may need to be cleared. 477 * 478 * 2) If the single-stepped instruction was a call, the return address 479 * that is atop the stack is the address following the copied instruction. 480 * We need to make it the address following the original instruction. 481 */ 482static void __kprobes resume_execution(struct kprobe *p, 483 struct pt_regs *regs, struct kprobe_ctlblk *kcb) 484{ 485 unsigned long *tos = (unsigned long *)regs->rsp; 486 unsigned long next_rip = 0; 487 unsigned long copy_rip = (unsigned long)p->ainsn.insn; 488 unsigned long orig_rip = (unsigned long)p->addr; 489 kprobe_opcode_t *insn = p->ainsn.insn; 490 491 /*skip the REX prefix*/ 492 if (*insn >= 0x40 && *insn <= 0x4f) 493 insn++; 494 495 switch (*insn) { 496 case 0x9c: /* pushfl */ 497 *tos &= ~(TF_MASK | IF_MASK); 498 *tos |= kcb->kprobe_old_rflags; 499 break; 500 case 0xc3: /* ret/lret */ 501 case 0xcb: 502 case 0xc2: 503 case 0xca: 504 regs->eflags &= ~TF_MASK; 505 /* rip is already adjusted, no more changes required*/ 506 return; 507 case 0xe8: /* call relative - Fix return addr */ 508 *tos = orig_rip + (*tos - copy_rip); 509 break; 510 case 0xff: 511 if ((insn[1] & 0x30) == 0x10) { 512 /* call absolute, indirect */ 513 /* Fix return addr; rip is correct. */ 514 next_rip = regs->rip; 515 *tos = orig_rip + (*tos - copy_rip); 516 } else if (((insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */ 517 ((insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */ 518 /* rip is correct. */ 519 next_rip = regs->rip; 520 } 521 break; 522 case 0xea: /* jmp absolute -- rip is correct */ 523 next_rip = regs->rip; 524 break; 525 default: 526 break; 527 } 528 529 regs->eflags &= ~TF_MASK; 530 if (next_rip) { 531 regs->rip = next_rip; 532 } else { 533 regs->rip = orig_rip + (regs->rip - copy_rip); 534 } 535} 536 537int __kprobes post_kprobe_handler(struct pt_regs *regs) 538{ 539 struct kprobe *cur = kprobe_running(); 540 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 541 542 if (!cur) 543 return 0; 544 545 if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { 546 kcb->kprobe_status = KPROBE_HIT_SSDONE; 547 cur->post_handler(cur, regs, 0); 548 } 549 550 resume_execution(cur, regs, kcb); 551 regs->eflags |= kcb->kprobe_saved_rflags; 552 553 /* Restore the original saved kprobes variables and continue. */ 554 if (kcb->kprobe_status == KPROBE_REENTER) { 555 restore_previous_kprobe(kcb); 556 goto out; 557 } 558 reset_current_kprobe(); 559out: 560 preempt_enable_no_resched(); 561 562 /* 563 * if somebody else is singlestepping across a probe point, eflags 564 * will have TF set, in which case, continue the remaining processing 565 * of do_debug, as if this is not a probe hit. 566 */ 567 if (regs->eflags & TF_MASK) 568 return 0; 569 570 return 1; 571} 572 573int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) 574{ 575 struct kprobe *cur = kprobe_running(); 576 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 577 const struct exception_table_entry *fixup; 578 579 switch(kcb->kprobe_status) { 580 case KPROBE_HIT_SS: 581 case KPROBE_REENTER: 582 /* 583 * We are here because the instruction being single 584 * stepped caused a page fault. We reset the current 585 * kprobe and the rip points back to the probe address 586 * and allow the page fault handler to continue as a 587 * normal page fault. 588 */ 589 regs->rip = (unsigned long)cur->addr; 590 regs->eflags |= kcb->kprobe_old_rflags; 591 if (kcb->kprobe_status == KPROBE_REENTER) 592 restore_previous_kprobe(kcb); 593 else 594 reset_current_kprobe(); 595 preempt_enable_no_resched(); 596 break; 597 case KPROBE_HIT_ACTIVE: 598 case KPROBE_HIT_SSDONE: 599 /* 600 * We increment the nmissed count for accounting, 601 * we can also use npre/npostfault count for accouting 602 * these specific fault cases. 603 */ 604 kprobes_inc_nmissed_count(cur); 605 606 /* 607 * We come here because instructions in the pre/post 608 * handler caused the page_fault, this could happen 609 * if handler tries to access user space by 610 * copy_from_user(), get_user() etc. Let the 611 * user-specified handler try to fix it first. 612 */ 613 if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) 614 return 1; 615 616 /* 617 * In case the user-specified fault handler returned 618 * zero, try to fix up. 619 */ 620 fixup = search_exception_tables(regs->rip); 621 if (fixup) { 622 regs->rip = fixup->fixup; 623 return 1; 624 } 625 626 /* 627 * fixup() could not handle it, 628 * Let do_page_fault() fix it. 629 */ 630 break; 631 default: 632 break; 633 } 634 return 0; 635} 636 637/* 638 * Wrapper routine for handling exceptions. 639 */ 640int __kprobes kprobe_exceptions_notify(struct notifier_block *self, 641 unsigned long val, void *data) 642{ 643 struct die_args *args = (struct die_args *)data; 644 int ret = NOTIFY_DONE; 645 646 if (args->regs && user_mode(args->regs)) 647 return ret; 648 649 switch (val) { 650 case DIE_INT3: 651 if (kprobe_handler(args->regs)) 652 ret = NOTIFY_STOP; 653 break; 654 case DIE_DEBUG: 655 if (post_kprobe_handler(args->regs)) 656 ret = NOTIFY_STOP; 657 break; 658 case DIE_GPF: 659 case DIE_PAGE_FAULT: 660 /* kprobe_running() needs smp_processor_id() */ 661 preempt_disable(); 662 if (kprobe_running() && 663 kprobe_fault_handler(args->regs, args->trapnr)) 664 ret = NOTIFY_STOP; 665 preempt_enable(); 666 break; 667 default: 668 break; 669 } 670 return ret; 671} 672 673int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) 674{ 675 struct jprobe *jp = container_of(p, struct jprobe, kp); 676 unsigned long addr; 677 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 678 679 kcb->jprobe_saved_regs = *regs; 680 kcb->jprobe_saved_rsp = (long *) regs->rsp; 681 addr = (unsigned long)(kcb->jprobe_saved_rsp); 682 /* 683 * As Linus pointed out, gcc assumes that the callee 684 * owns the argument space and could overwrite it, e.g. 685 * tailcall optimization. So, to be absolutely safe 686 * we also save and restore enough stack bytes to cover 687 * the argument area. 688 */ 689 memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, 690 MIN_STACK_SIZE(addr)); 691 regs->eflags &= ~IF_MASK; 692 regs->rip = (unsigned long)(jp->entry); 693 return 1; 694} 695 696void __kprobes jprobe_return(void) 697{ 698 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 699 700 asm volatile (" xchg %%rbx,%%rsp \n" 701 " int3 \n" 702 " .globl jprobe_return_end \n" 703 " jprobe_return_end: \n" 704 " nop \n"::"b" 705 (kcb->jprobe_saved_rsp):"memory"); 706} 707 708int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) 709{ 710 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 711 u8 *addr = (u8 *) (regs->rip - 1); 712 unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_rsp); 713 struct jprobe *jp = container_of(p, struct jprobe, kp); 714 715 if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) { 716 if ((long *)regs->rsp != kcb->jprobe_saved_rsp) { 717 struct pt_regs *saved_regs = 718 container_of(kcb->jprobe_saved_rsp, 719 struct pt_regs, rsp); 720 printk("current rsp %p does not match saved rsp %p\n", 721 (long *)regs->rsp, kcb->jprobe_saved_rsp); 722 printk("Saved registers for jprobe %p\n", jp); 723 show_registers(saved_regs); 724 printk("Current registers\n"); 725 show_registers(regs); 726 BUG(); 727 } 728 *regs = kcb->jprobe_saved_regs; 729 memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack, 730 MIN_STACK_SIZE(stack_addr)); 731 preempt_enable_no_resched(); 732 return 1; 733 } 734 return 0; 735} 736 737static struct kprobe trampoline_p = { 738 .addr = (kprobe_opcode_t *) &kretprobe_trampoline, 739 .pre_handler = trampoline_probe_handler 740}; 741 742int __init arch_init_kprobes(void) 743{ 744 return register_kprobe(&trampoline_p); 745} 746 747int __kprobes arch_trampoline_kprobe(struct kprobe *p) 748{ 749 if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline) 750 return 1; 751 752 return 0; 753} 754