1/* ptrace.c */ 2/* By Ross Biro 1/23/92 */ 3/* 4 * Pentium III FXSR, SSE support 5 * Gareth Hughes <gareth@valinux.com>, May 2000 6 * 7 * x86-64 port 2000-2002 Andi Kleen 8 */ 9 10#include <linux/kernel.h> 11#include <linux/sched.h> 12#include <linux/mm.h> 13#include <linux/smp.h> 14#include <linux/errno.h> 15#include <linux/ptrace.h> 16#include <linux/user.h> 17#include <linux/security.h> 18#include <linux/audit.h> 19#include <linux/seccomp.h> 20#include <linux/signal.h> 21 22#include <asm/uaccess.h> 23#include <asm/pgtable.h> 24#include <asm/system.h> 25#include <asm/processor.h> 26#include <asm/i387.h> 27#include <asm/debugreg.h> 28#include <asm/ldt.h> 29#include <asm/desc.h> 30#include <asm/proto.h> 31#include <asm/ia32.h> 32 33/* 34 * does not yet catch signals sent when the child dies. 35 * in exit.c or in signal.c. 36 */ 37 38/* 39 * Determines which flags the user has access to [1 = access, 0 = no access]. 40 * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9). 41 * Also masks reserved bits (63-22, 15, 5, 3, 1). 42 */ 43#define FLAG_MASK 0x54dd5UL 44 45/* set's the trap flag. */ 46#define TRAP_FLAG 0x100UL 47 48/* 49 * eflags and offset of eflags on child stack.. 50 */ 51#define EFLAGS offsetof(struct pt_regs, eflags) 52#define EFL_OFFSET ((int)(EFLAGS-sizeof(struct pt_regs))) 53 54/* 55 * this routine will get a word off of the processes privileged stack. 56 * the offset is how far from the base addr as stored in the TSS. 57 * this routine assumes that all the privileged stacks are in our 58 * data space. 59 */ 60static inline unsigned long get_stack_long(struct task_struct *task, int offset) 61{ 62 unsigned char *stack; 63 64 stack = (unsigned char *)task->thread.rsp0; 65 stack += offset; 66 return (*((unsigned long *)stack)); 67} 68 69/* 70 * this routine will put a word on the processes privileged stack. 71 * the offset is how far from the base addr as stored in the TSS. 72 * this routine assumes that all the privileged stacks are in our 73 * data space. 74 */ 75static inline long put_stack_long(struct task_struct *task, int offset, 76 unsigned long data) 77{ 78 unsigned char * stack; 79 80 stack = (unsigned char *) task->thread.rsp0; 81 stack += offset; 82 *(unsigned long *) stack = data; 83 return 0; 84} 85 86#define LDT_SEGMENT 4 87 88unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs) 89{ 90 unsigned long addr, seg; 91 92 addr = regs->rip; 93 seg = regs->cs & 0xffff; 94 95 /* 96 * We'll assume that the code segments in the GDT 97 * are all zero-based. That is largely true: the 98 * TLS segments are used for data, and the PNPBIOS 99 * and APM bios ones we just ignore here. 100 */ 101 if (seg & LDT_SEGMENT) { 102 u32 *desc; 103 unsigned long base; 104 105 down(&child->mm->context.sem); 106 desc = child->mm->context.ldt + (seg & ~7); 107 base = (desc[0] >> 16) | ((desc[1] & 0xff) << 16) | (desc[1] & 0xff000000); 108 109 /* 16-bit code segment? */ 110 if (!((desc[1] >> 22) & 1)) 111 addr &= 0xffff; 112 addr += base; 113 up(&child->mm->context.sem); 114 } 115 return addr; 116} 117 118static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) 119{ 120 int i, copied; 121 unsigned char opcode[15]; 122 unsigned long addr = convert_rip_to_linear(child, regs); 123 124 copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); 125 for (i = 0; i < copied; i++) { 126 switch (opcode[i]) { 127 /* popf and iret */ 128 case 0x9d: case 0xcf: 129 return 1; 130 131 /* CHECKME: 64 65 */ 132 133 /* opcode and address size prefixes */ 134 case 0x66: case 0x67: 135 continue; 136 /* irrelevant prefixes (segment overrides and repeats) */ 137 case 0x26: case 0x2e: 138 case 0x36: case 0x3e: 139 case 0x64: case 0x65: 140 case 0xf2: case 0xf3: 141 continue; 142 143 case 0x40 ... 0x4f: 144 if (regs->cs != __USER_CS) 145 /* 32-bit mode: register increment */ 146 return 0; 147 /* 64-bit mode: REX prefix */ 148 continue; 149 150 /* CHECKME: f2, f3 */ 151 152 /* 153 * pushf: NOTE! We should probably not let 154 * the user see the TF bit being set. But 155 * it's more pain than it's worth to avoid 156 * it, and a debugger could emulate this 157 * all in user space if it _really_ cares. 158 */ 159 case 0x9c: 160 default: 161 return 0; 162 } 163 } 164 return 0; 165} 166 167static void set_singlestep(struct task_struct *child) 168{ 169 struct pt_regs *regs = task_pt_regs(child); 170 171 /* 172 * Always set TIF_SINGLESTEP - this guarantees that 173 * we single-step system calls etc.. This will also 174 * cause us to set TF when returning to user mode. 175 */ 176 set_tsk_thread_flag(child, TIF_SINGLESTEP); 177 178 /* 179 * If TF was already set, don't do anything else 180 */ 181 if (regs->eflags & TRAP_FLAG) 182 return; 183 184 /* Set TF on the kernel stack.. */ 185 regs->eflags |= TRAP_FLAG; 186 187 /* 188 * ..but if TF is changed by the instruction we will trace, 189 * don't mark it as being "us" that set it, so that we 190 * won't clear it by hand later. 191 */ 192 if (is_setting_trap_flag(child, regs)) 193 return; 194 195 child->ptrace |= PT_DTRACE; 196} 197 198static void clear_singlestep(struct task_struct *child) 199{ 200 /* Always clear TIF_SINGLESTEP... */ 201 clear_tsk_thread_flag(child, TIF_SINGLESTEP); 202 203 /* But touch TF only if it was set by us.. */ 204 if (child->ptrace & PT_DTRACE) { 205 struct pt_regs *regs = task_pt_regs(child); 206 regs->eflags &= ~TRAP_FLAG; 207 child->ptrace &= ~PT_DTRACE; 208 } 209} 210 211/* 212 * Called by kernel/ptrace.c when detaching.. 213 * 214 * Make sure the single step bit is not set. 215 */ 216void ptrace_disable(struct task_struct *child) 217{ 218 clear_singlestep(child); 219} 220 221static int putreg(struct task_struct *child, 222 unsigned long regno, unsigned long value) 223{ 224 unsigned long tmp; 225 226 /* Some code in the 64bit emulation may not be 64bit clean. 227 Don't take any chances. */ 228 if (test_tsk_thread_flag(child, TIF_IA32)) 229 value &= 0xffffffff; 230 switch (regno) { 231 case offsetof(struct user_regs_struct,fs): 232 if (value && (value & 3) != 3) 233 return -EIO; 234 child->thread.fsindex = value & 0xffff; 235 return 0; 236 case offsetof(struct user_regs_struct,gs): 237 if (value && (value & 3) != 3) 238 return -EIO; 239 child->thread.gsindex = value & 0xffff; 240 return 0; 241 case offsetof(struct user_regs_struct,ds): 242 if (value && (value & 3) != 3) 243 return -EIO; 244 child->thread.ds = value & 0xffff; 245 return 0; 246 case offsetof(struct user_regs_struct,es): 247 if (value && (value & 3) != 3) 248 return -EIO; 249 child->thread.es = value & 0xffff; 250 return 0; 251 case offsetof(struct user_regs_struct,ss): 252 if ((value & 3) != 3) 253 return -EIO; 254 value &= 0xffff; 255 return 0; 256 case offsetof(struct user_regs_struct,fs_base): 257 if (value >= TASK_SIZE_OF(child)) 258 return -EIO; 259 child->thread.fs = value; 260 return 0; 261 case offsetof(struct user_regs_struct,gs_base): 262 if (value >= TASK_SIZE_OF(child)) 263 return -EIO; 264 child->thread.gs = value; 265 return 0; 266 case offsetof(struct user_regs_struct, eflags): 267 value &= FLAG_MASK; 268 tmp = get_stack_long(child, EFL_OFFSET); 269 tmp &= ~FLAG_MASK; 270 value |= tmp; 271 break; 272 case offsetof(struct user_regs_struct,cs): 273 if ((value & 3) != 3) 274 return -EIO; 275 value &= 0xffff; 276 break; 277 } 278 put_stack_long(child, regno - sizeof(struct pt_regs), value); 279 return 0; 280} 281 282static unsigned long getreg(struct task_struct *child, unsigned long regno) 283{ 284 unsigned long val; 285 switch (regno) { 286 case offsetof(struct user_regs_struct, fs): 287 return child->thread.fsindex; 288 case offsetof(struct user_regs_struct, gs): 289 return child->thread.gsindex; 290 case offsetof(struct user_regs_struct, ds): 291 return child->thread.ds; 292 case offsetof(struct user_regs_struct, es): 293 return child->thread.es; 294 case offsetof(struct user_regs_struct, fs_base): 295 return child->thread.fs; 296 case offsetof(struct user_regs_struct, gs_base): 297 return child->thread.gs; 298 default: 299 regno = regno - sizeof(struct pt_regs); 300 val = get_stack_long(child, regno); 301 if (test_tsk_thread_flag(child, TIF_IA32)) 302 val &= 0xffffffff; 303 return val; 304 } 305 306} 307 308long arch_ptrace(struct task_struct *child, long request, long addr, long data) 309{ 310 long i, ret; 311 unsigned ui; 312 313 switch (request) { 314 /* when I and D space are separate, these will need to be fixed. */ 315 case PTRACE_PEEKTEXT: /* read word at location addr. */ 316 case PTRACE_PEEKDATA: { 317 unsigned long tmp; 318 int copied; 319 320 copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); 321 ret = -EIO; 322 if (copied != sizeof(tmp)) 323 break; 324 ret = put_user(tmp,(unsigned long __user *) data); 325 break; 326 } 327 328 /* read the word at location addr in the USER area. */ 329 case PTRACE_PEEKUSR: { 330 unsigned long tmp; 331 332 ret = -EIO; 333 if ((addr & 7) || 334 addr > sizeof(struct user) - 7) 335 break; 336 337 switch (addr) { 338 case 0 ... sizeof(struct user_regs_struct) - sizeof(long): 339 tmp = getreg(child, addr); 340 break; 341 case offsetof(struct user, u_debugreg[0]): 342 tmp = child->thread.debugreg0; 343 break; 344 case offsetof(struct user, u_debugreg[1]): 345 tmp = child->thread.debugreg1; 346 break; 347 case offsetof(struct user, u_debugreg[2]): 348 tmp = child->thread.debugreg2; 349 break; 350 case offsetof(struct user, u_debugreg[3]): 351 tmp = child->thread.debugreg3; 352 break; 353 case offsetof(struct user, u_debugreg[6]): 354 tmp = child->thread.debugreg6; 355 break; 356 case offsetof(struct user, u_debugreg[7]): 357 tmp = child->thread.debugreg7; 358 break; 359 default: 360 tmp = 0; 361 break; 362 } 363 ret = put_user(tmp,(unsigned long __user *) data); 364 break; 365 } 366 367 /* when I and D space are separate, this will have to be fixed. */ 368 case PTRACE_POKETEXT: /* write the word at location addr. */ 369 case PTRACE_POKEDATA: 370 ret = 0; 371 if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) 372 break; 373 ret = -EIO; 374 break; 375 376 case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ 377 { 378 int dsize = test_tsk_thread_flag(child, TIF_IA32) ? 3 : 7; 379 ret = -EIO; 380 if ((addr & 7) || 381 addr > sizeof(struct user) - 7) 382 break; 383 384 switch (addr) { 385 case 0 ... sizeof(struct user_regs_struct) - sizeof(long): 386 ret = putreg(child, addr, data); 387 break; 388 /* Disallows to set a breakpoint into the vsyscall */ 389 case offsetof(struct user, u_debugreg[0]): 390 if (data >= TASK_SIZE_OF(child) - dsize) break; 391 child->thread.debugreg0 = data; 392 ret = 0; 393 break; 394 case offsetof(struct user, u_debugreg[1]): 395 if (data >= TASK_SIZE_OF(child) - dsize) break; 396 child->thread.debugreg1 = data; 397 ret = 0; 398 break; 399 case offsetof(struct user, u_debugreg[2]): 400 if (data >= TASK_SIZE_OF(child) - dsize) break; 401 child->thread.debugreg2 = data; 402 ret = 0; 403 break; 404 case offsetof(struct user, u_debugreg[3]): 405 if (data >= TASK_SIZE_OF(child) - dsize) break; 406 child->thread.debugreg3 = data; 407 ret = 0; 408 break; 409 case offsetof(struct user, u_debugreg[6]): 410 if (data >> 32) 411 break; 412 child->thread.debugreg6 = data; 413 ret = 0; 414 break; 415 case offsetof(struct user, u_debugreg[7]): 416 /* See arch/i386/kernel/ptrace.c for an explanation of 417 * this awkward check.*/ 418 data &= ~DR_CONTROL_RESERVED; 419 for(i=0; i<4; i++) 420 if ((0x5554 >> ((data >> (16 + 4*i)) & 0xf)) & 1) 421 break; 422 if (i == 4) { 423 child->thread.debugreg7 = data; 424 if (data) 425 set_tsk_thread_flag(child, TIF_DEBUG); 426 else 427 clear_tsk_thread_flag(child, TIF_DEBUG); 428 ret = 0; 429 } 430 break; 431 } 432 break; 433 } 434 case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ 435 case PTRACE_CONT: /* restart after signal. */ 436 437 ret = -EIO; 438 if (!valid_signal(data)) 439 break; 440 if (request == PTRACE_SYSCALL) 441 set_tsk_thread_flag(child,TIF_SYSCALL_TRACE); 442 else 443 clear_tsk_thread_flag(child,TIF_SYSCALL_TRACE); 444 clear_tsk_thread_flag(child, TIF_SINGLESTEP); 445 child->exit_code = data; 446 /* make sure the single step bit is not set. */ 447 clear_singlestep(child); 448 wake_up_process(child); 449 ret = 0; 450 break; 451 452#ifdef CONFIG_IA32_EMULATION 453 /* This makes only sense with 32bit programs. Allow a 454 64bit debugger to fully examine them too. Better 455 don't use it against 64bit processes, use 456 PTRACE_ARCH_PRCTL instead. */ 457 case PTRACE_SET_THREAD_AREA: { 458 struct user_desc __user *p; 459 int old; 460 p = (struct user_desc __user *)data; 461 get_user(old, &p->entry_number); 462 put_user(addr, &p->entry_number); 463 ret = do_set_thread_area(&child->thread, p); 464 put_user(old, &p->entry_number); 465 break; 466 case PTRACE_GET_THREAD_AREA: 467 p = (struct user_desc __user *)data; 468 get_user(old, &p->entry_number); 469 put_user(addr, &p->entry_number); 470 ret = do_get_thread_area(&child->thread, p); 471 put_user(old, &p->entry_number); 472 break; 473 } 474#endif 475 /* normal 64bit interface to access TLS data. 476 Works just like arch_prctl, except that the arguments 477 are reversed. */ 478 case PTRACE_ARCH_PRCTL: 479 ret = do_arch_prctl(child, data, addr); 480 break; 481 482/* 483 * make the child exit. Best I can do is send it a sigkill. 484 * perhaps it should be put in the status that it wants to 485 * exit. 486 */ 487 case PTRACE_KILL: 488 ret = 0; 489 if (child->exit_state == EXIT_ZOMBIE) /* already dead */ 490 break; 491 clear_tsk_thread_flag(child, TIF_SINGLESTEP); 492 child->exit_code = SIGKILL; 493 /* make sure the single step bit is not set. */ 494 clear_singlestep(child); 495 wake_up_process(child); 496 break; 497 498 case PTRACE_SINGLESTEP: /* set the trap flag. */ 499 ret = -EIO; 500 if (!valid_signal(data)) 501 break; 502 clear_tsk_thread_flag(child,TIF_SYSCALL_TRACE); 503 set_singlestep(child); 504 child->exit_code = data; 505 /* give it a chance to run. */ 506 wake_up_process(child); 507 ret = 0; 508 break; 509 510 case PTRACE_DETACH: 511 /* detach a process that was attached. */ 512 ret = ptrace_detach(child, data); 513 break; 514 515 case PTRACE_GETREGS: { /* Get all gp regs from the child. */ 516 if (!access_ok(VERIFY_WRITE, (unsigned __user *)data, 517 sizeof(struct user_regs_struct))) { 518 ret = -EIO; 519 break; 520 } 521 ret = 0; 522 for (ui = 0; ui < sizeof(struct user_regs_struct); ui += sizeof(long)) { 523 ret |= __put_user(getreg(child, ui),(unsigned long __user *) data); 524 data += sizeof(long); 525 } 526 break; 527 } 528 529 case PTRACE_SETREGS: { /* Set all gp regs in the child. */ 530 unsigned long tmp; 531 if (!access_ok(VERIFY_READ, (unsigned __user *)data, 532 sizeof(struct user_regs_struct))) { 533 ret = -EIO; 534 break; 535 } 536 ret = 0; 537 for (ui = 0; ui < sizeof(struct user_regs_struct); ui += sizeof(long)) { 538 ret = __get_user(tmp, (unsigned long __user *) data); 539 if (ret) 540 break; 541 ret = putreg(child, ui, tmp); 542 if (ret) 543 break; 544 data += sizeof(long); 545 } 546 break; 547 } 548 549 case PTRACE_GETFPREGS: { /* Get the child extended FPU state. */ 550 if (!access_ok(VERIFY_WRITE, (unsigned __user *)data, 551 sizeof(struct user_i387_struct))) { 552 ret = -EIO; 553 break; 554 } 555 ret = get_fpregs((struct user_i387_struct __user *)data, child); 556 break; 557 } 558 559 case PTRACE_SETFPREGS: { /* Set the child extended FPU state. */ 560 if (!access_ok(VERIFY_READ, (unsigned __user *)data, 561 sizeof(struct user_i387_struct))) { 562 ret = -EIO; 563 break; 564 } 565 set_stopped_child_used_math(child); 566 ret = set_fpregs(child, (struct user_i387_struct __user *)data); 567 break; 568 } 569 570 default: 571 ret = ptrace_request(child, request, addr, data); 572 break; 573 } 574 return ret; 575} 576 577static void syscall_trace(struct pt_regs *regs) 578{ 579 580 581 ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) 582 ? 0x80 : 0)); 583 /* 584 * this isn't the same as continuing with a signal, but it will do 585 * for normal use. strace only continues with a signal if the 586 * stopping signal is not SIGTRAP. -brl 587 */ 588 if (current->exit_code) { 589 send_sig(current->exit_code, current, 1); 590 current->exit_code = 0; 591 } 592} 593 594asmlinkage void syscall_trace_enter(struct pt_regs *regs) 595{ 596 /* do the secure computing check first */ 597 secure_computing(regs->orig_rax); 598 599 if (test_thread_flag(TIF_SYSCALL_TRACE) 600 && (current->ptrace & PT_PTRACED)) 601 syscall_trace(regs); 602 603 if (unlikely(current->audit_context)) { 604 if (test_thread_flag(TIF_IA32)) { 605 audit_syscall_entry(AUDIT_ARCH_I386, 606 regs->orig_rax, 607 regs->rbx, regs->rcx, 608 regs->rdx, regs->rsi); 609 } else { 610 audit_syscall_entry(AUDIT_ARCH_X86_64, 611 regs->orig_rax, 612 regs->rdi, regs->rsi, 613 regs->rdx, regs->r10); 614 } 615 } 616} 617 618asmlinkage void syscall_trace_leave(struct pt_regs *regs) 619{ 620 if (unlikely(current->audit_context)) 621 audit_syscall_exit(AUDITSC_RESULT(regs->rax), regs->rax); 622 623 if ((test_thread_flag(TIF_SYSCALL_TRACE) 624 || test_thread_flag(TIF_SINGLESTEP)) 625 && (current->ptrace & PT_PTRACED)) 626 syscall_trace(regs); 627} 628