1/* 2 * Derived from "arch/i386/kernel/process.c" 3 * Copyright (C) 1995 Linus Torvalds 4 * 5 * Updated and modified by Cort Dougan (cort@cs.nmt.edu) and 6 * Paul Mackerras (paulus@cs.anu.edu.au) 7 * 8 * PowerPC version 9 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License 13 * as published by the Free Software Foundation; either version 14 * 2 of the License, or (at your option) any later version. 15 */ 16 17#include <linux/errno.h> 18#include <linux/sched.h> 19#include <linux/kernel.h> 20#include <linux/mm.h> 21#include <linux/smp.h> 22#include <linux/stddef.h> 23#include <linux/unistd.h> 24#include <linux/ptrace.h> 25#include <linux/slab.h> 26#include <linux/user.h> 27#include <linux/elf.h> 28#include <linux/init.h> 29#include <linux/prctl.h> 30#include <linux/init_task.h> 31#include <linux/module.h> 32#include <linux/kallsyms.h> 33#include <linux/mqueue.h> 34#include <linux/hardirq.h> 35#include <linux/utsname.h> 36#include <linux/ftrace.h> 37#include <linux/kernel_stat.h> 38#include <linux/personality.h> 39#include <linux/random.h> 40#include <linux/hw_breakpoint.h> 41 42#include <asm/pgtable.h> 43#include <asm/uaccess.h> 44#include <asm/system.h> 45#include <asm/io.h> 46#include <asm/processor.h> 47#include <asm/mmu.h> 48#include <asm/prom.h> 49#include <asm/machdep.h> 50#include <asm/time.h> 51#include <asm/syscalls.h> 52#ifdef CONFIG_PPC64 53#include <asm/firmware.h> 54#endif 55#include <linux/kprobes.h> 56#include <linux/kdebug.h> 57 58extern unsigned long _get_SP(void); 59 60#ifndef CONFIG_SMP 61struct task_struct *last_task_used_math = NULL; 62struct task_struct *last_task_used_altivec = NULL; 63struct task_struct *last_task_used_vsx = NULL; 64struct task_struct *last_task_used_spe = NULL; 65#endif 66 67/* 68 * Make sure the floating-point register state in the 69 * the thread_struct is up to date for task tsk. 70 */ 71void flush_fp_to_thread(struct task_struct *tsk) 72{ 73 if (tsk->thread.regs) { 74 /* 75 * We need to disable preemption here because if we didn't, 76 * another process could get scheduled after the regs->msr 77 * test but before we have finished saving the FP registers 78 * to the thread_struct. That process could take over the 79 * FPU, and then when we get scheduled again we would store 80 * bogus values for the remaining FP registers. 81 */ 82 preempt_disable(); 83 if (tsk->thread.regs->msr & MSR_FP) { 84#ifdef CONFIG_SMP 85 /* 86 * This should only ever be called for current or 87 * for a stopped child process. Since we save away 88 * the FP register state on context switch on SMP, 89 * there is something wrong if a stopped child appears 90 * to still have its FP state in the CPU registers. 91 */ 92 BUG_ON(tsk != current); 93#endif 94 giveup_fpu(tsk); 95 } 96 preempt_enable(); 97 } 98} 99 100void enable_kernel_fp(void) 101{ 102 WARN_ON(preemptible()); 103 104#ifdef CONFIG_SMP 105 if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) 106 giveup_fpu(current); 107 else 108 giveup_fpu(NULL); /* just enables FP for kernel */ 109#else 110 giveup_fpu(last_task_used_math); 111#endif /* CONFIG_SMP */ 112} 113EXPORT_SYMBOL(enable_kernel_fp); 114 115#ifdef CONFIG_ALTIVEC 116void enable_kernel_altivec(void) 117{ 118 WARN_ON(preemptible()); 119 120#ifdef CONFIG_SMP 121 if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) 122 giveup_altivec(current); 123 else 124 giveup_altivec(NULL); /* just enable AltiVec for kernel - force */ 125#else 126 giveup_altivec(last_task_used_altivec); 127#endif /* CONFIG_SMP */ 128} 129EXPORT_SYMBOL(enable_kernel_altivec); 130 131/* 132 * Make sure the VMX/Altivec register state in the 133 * the thread_struct is up to date for task tsk. 134 */ 135void flush_altivec_to_thread(struct task_struct *tsk) 136{ 137 if (tsk->thread.regs) { 138 preempt_disable(); 139 if (tsk->thread.regs->msr & MSR_VEC) { 140#ifdef CONFIG_SMP 141 BUG_ON(tsk != current); 142#endif 143 giveup_altivec(tsk); 144 } 145 preempt_enable(); 146 } 147} 148#endif /* CONFIG_ALTIVEC */ 149 150#ifdef CONFIG_VSX 151 152void giveup_vsx(struct task_struct *tsk) 153{ 154 giveup_fpu(tsk); 155 giveup_altivec(tsk); 156 __giveup_vsx(tsk); 157} 158 159void flush_vsx_to_thread(struct task_struct *tsk) 160{ 161 if (tsk->thread.regs) { 162 preempt_disable(); 163 if (tsk->thread.regs->msr & MSR_VSX) { 164#ifdef CONFIG_SMP 165 BUG_ON(tsk != current); 166#endif 167 giveup_vsx(tsk); 168 } 169 preempt_enable(); 170 } 171} 172#endif /* CONFIG_VSX */ 173 174#ifdef CONFIG_SPE 175 176void enable_kernel_spe(void) 177{ 178 WARN_ON(preemptible()); 179 180#ifdef CONFIG_SMP 181 if (current->thread.regs && (current->thread.regs->msr & MSR_SPE)) 182 giveup_spe(current); 183 else 184 giveup_spe(NULL); /* just enable SPE for kernel - force */ 185#else 186 giveup_spe(last_task_used_spe); 187#endif /* __SMP __ */ 188} 189EXPORT_SYMBOL(enable_kernel_spe); 190 191void flush_spe_to_thread(struct task_struct *tsk) 192{ 193 if (tsk->thread.regs) { 194 preempt_disable(); 195 if (tsk->thread.regs->msr & MSR_SPE) { 196#ifdef CONFIG_SMP 197 BUG_ON(tsk != current); 198#endif 199 giveup_spe(tsk); 200 } 201 preempt_enable(); 202 } 203} 204#endif /* CONFIG_SPE */ 205 206#ifndef CONFIG_SMP 207/* 208 * If we are doing lazy switching of CPU state (FP, altivec or SPE), 209 * and the current task has some state, discard it. 210 */ 211void discard_lazy_cpu_state(void) 212{ 213 preempt_disable(); 214 if (last_task_used_math == current) 215 last_task_used_math = NULL; 216#ifdef CONFIG_ALTIVEC 217 if (last_task_used_altivec == current) 218 last_task_used_altivec = NULL; 219#endif /* CONFIG_ALTIVEC */ 220#ifdef CONFIG_VSX 221 if (last_task_used_vsx == current) 222 last_task_used_vsx = NULL; 223#endif /* CONFIG_VSX */ 224#ifdef CONFIG_SPE 225 if (last_task_used_spe == current) 226 last_task_used_spe = NULL; 227#endif 228 preempt_enable(); 229} 230#endif /* CONFIG_SMP */ 231 232#ifdef CONFIG_PPC_ADV_DEBUG_REGS 233void do_send_trap(struct pt_regs *regs, unsigned long address, 234 unsigned long error_code, int signal_code, int breakpt) 235{ 236 siginfo_t info; 237 238 if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, 239 11, SIGSEGV) == NOTIFY_STOP) 240 return; 241 242 /* Deliver the signal to userspace */ 243 info.si_signo = SIGTRAP; 244 info.si_errno = breakpt; /* breakpoint or watchpoint id */ 245 info.si_code = signal_code; 246 info.si_addr = (void __user *)address; 247 force_sig_info(SIGTRAP, &info, current); 248} 249#else /* !CONFIG_PPC_ADV_DEBUG_REGS */ 250void do_dabr(struct pt_regs *regs, unsigned long address, 251 unsigned long error_code) 252{ 253 siginfo_t info; 254 255 if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, 256 11, SIGSEGV) == NOTIFY_STOP) 257 return; 258 259 if (debugger_dabr_match(regs)) 260 return; 261 262 /* Clear the DABR */ 263 set_dabr(0); 264 265 /* Deliver the signal to userspace */ 266 info.si_signo = SIGTRAP; 267 info.si_errno = 0; 268 info.si_code = TRAP_HWBKPT; 269 info.si_addr = (void __user *)address; 270 force_sig_info(SIGTRAP, &info, current); 271} 272#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ 273 274static DEFINE_PER_CPU(unsigned long, current_dabr); 275 276#ifdef CONFIG_PPC_ADV_DEBUG_REGS 277/* 278 * Set the debug registers back to their default "safe" values. 279 */ 280static void set_debug_reg_defaults(struct thread_struct *thread) 281{ 282 thread->iac1 = thread->iac2 = 0; 283#if CONFIG_PPC_ADV_DEBUG_IACS > 2 284 thread->iac3 = thread->iac4 = 0; 285#endif 286 thread->dac1 = thread->dac2 = 0; 287#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 288 thread->dvc1 = thread->dvc2 = 0; 289#endif 290 thread->dbcr0 = 0; 291#ifdef CONFIG_BOOKE 292 /* 293 * Force User/Supervisor bits to b11 (user-only MSR[PR]=1) 294 */ 295 thread->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | \ 296 DBCR1_IAC3US | DBCR1_IAC4US; 297 /* 298 * Force Data Address Compare User/Supervisor bits to be User-only 299 * (0b11 MSR[PR]=1) and set all other bits in DBCR2 register to be 0. 300 */ 301 thread->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; 302#else 303 thread->dbcr1 = 0; 304#endif 305} 306 307static void prime_debug_regs(struct thread_struct *thread) 308{ 309 mtspr(SPRN_IAC1, thread->iac1); 310 mtspr(SPRN_IAC2, thread->iac2); 311#if CONFIG_PPC_ADV_DEBUG_IACS > 2 312 mtspr(SPRN_IAC3, thread->iac3); 313 mtspr(SPRN_IAC4, thread->iac4); 314#endif 315 mtspr(SPRN_DAC1, thread->dac1); 316 mtspr(SPRN_DAC2, thread->dac2); 317#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 318 mtspr(SPRN_DVC1, thread->dvc1); 319 mtspr(SPRN_DVC2, thread->dvc2); 320#endif 321 mtspr(SPRN_DBCR0, thread->dbcr0); 322 mtspr(SPRN_DBCR1, thread->dbcr1); 323#ifdef CONFIG_BOOKE 324 mtspr(SPRN_DBCR2, thread->dbcr2); 325#endif 326} 327/* 328 * Unless neither the old or new thread are making use of the 329 * debug registers, set the debug registers from the values 330 * stored in the new thread. 331 */ 332static void switch_booke_debug_regs(struct thread_struct *new_thread) 333{ 334 if ((current->thread.dbcr0 & DBCR0_IDM) 335 || (new_thread->dbcr0 & DBCR0_IDM)) 336 prime_debug_regs(new_thread); 337} 338#else /* !CONFIG_PPC_ADV_DEBUG_REGS */ 339static void set_debug_reg_defaults(struct thread_struct *thread) 340{ 341 if (thread->dabr) { 342 thread->dabr = 0; 343 set_dabr(0); 344 } 345} 346#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ 347 348int set_dabr(unsigned long dabr) 349{ 350 __get_cpu_var(current_dabr) = dabr; 351 352 if (ppc_md.set_dabr) 353 return ppc_md.set_dabr(dabr); 354 355#ifdef CONFIG_PPC_ADV_DEBUG_REGS 356 mtspr(SPRN_DAC1, dabr); 357#ifdef CONFIG_PPC_47x 358 isync(); 359#endif 360#elif defined(CONFIG_PPC_BOOK3S) 361 mtspr(SPRN_DABR, dabr); 362#endif 363 364 365 return 0; 366} 367 368#ifdef CONFIG_PPC64 369DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array); 370#endif 371 372struct task_struct *__switch_to(struct task_struct *prev, 373 struct task_struct *new) 374{ 375 struct thread_struct *new_thread, *old_thread; 376 unsigned long flags; 377 struct task_struct *last; 378 379#ifdef CONFIG_SMP 380 /* avoid complexity of lazy save/restore of fpu 381 * by just saving it every time we switch out if 382 * this task used the fpu during the last quantum. 383 * 384 * If it tries to use the fpu again, it'll trap and 385 * reload its fp regs. So we don't have to do a restore 386 * every switch, just a save. 387 * -- Cort 388 */ 389 if (prev->thread.regs && (prev->thread.regs->msr & MSR_FP)) 390 giveup_fpu(prev); 391#ifdef CONFIG_ALTIVEC 392 /* 393 * If the previous thread used altivec in the last quantum 394 * (thus changing altivec regs) then save them. 395 * We used to check the VRSAVE register but not all apps 396 * set it, so we don't rely on it now (and in fact we need 397 * to save & restore VSCR even if VRSAVE == 0). -- paulus 398 * 399 * On SMP we always save/restore altivec regs just to avoid the 400 * complexity of changing processors. 401 * -- Cort 402 */ 403 if (prev->thread.regs && (prev->thread.regs->msr & MSR_VEC)) 404 giveup_altivec(prev); 405#endif /* CONFIG_ALTIVEC */ 406#ifdef CONFIG_VSX 407 if (prev->thread.regs && (prev->thread.regs->msr & MSR_VSX)) 408 /* VMX and FPU registers are already save here */ 409 __giveup_vsx(prev); 410#endif /* CONFIG_VSX */ 411#ifdef CONFIG_SPE 412 /* 413 * If the previous thread used spe in the last quantum 414 * (thus changing spe regs) then save them. 415 * 416 * On SMP we always save/restore spe regs just to avoid the 417 * complexity of changing processors. 418 */ 419 if ((prev->thread.regs && (prev->thread.regs->msr & MSR_SPE))) 420 giveup_spe(prev); 421#endif /* CONFIG_SPE */ 422 423#else /* CONFIG_SMP */ 424#ifdef CONFIG_ALTIVEC 425 /* Avoid the trap. On smp this this never happens since 426 * we don't set last_task_used_altivec -- Cort 427 */ 428 if (new->thread.regs && last_task_used_altivec == new) 429 new->thread.regs->msr |= MSR_VEC; 430#endif /* CONFIG_ALTIVEC */ 431#ifdef CONFIG_VSX 432 if (new->thread.regs && last_task_used_vsx == new) 433 new->thread.regs->msr |= MSR_VSX; 434#endif /* CONFIG_VSX */ 435#ifdef CONFIG_SPE 436 /* Avoid the trap. On smp this this never happens since 437 * we don't set last_task_used_spe 438 */ 439 if (new->thread.regs && last_task_used_spe == new) 440 new->thread.regs->msr |= MSR_SPE; 441#endif /* CONFIG_SPE */ 442 443#endif /* CONFIG_SMP */ 444 445#ifdef CONFIG_PPC_ADV_DEBUG_REGS 446 switch_booke_debug_regs(&new->thread); 447#else 448/* 449 * For PPC_BOOK3S_64, we use the hw-breakpoint interfaces that would 450 * schedule DABR 451 */ 452#ifndef CONFIG_HAVE_HW_BREAKPOINT 453 if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) 454 set_dabr(new->thread.dabr); 455#endif /* CONFIG_HAVE_HW_BREAKPOINT */ 456#endif 457 458 459 new_thread = &new->thread; 460 old_thread = ¤t->thread; 461 462#if defined(CONFIG_PPC_BOOK3E_64) 463 if ((new_thread->dbcr0 | old_thread->dbcr0) & DBCR0_IDM) { 464 u32 dbcr0; 465 466 mtmsr(mfmsr() & ~MSR_DE); 467 isync(); 468 dbcr0 = mfspr(SPRN_DBCR0); 469 dbcr0 = (dbcr0 & DBCR0_EDM) | new_thread->dbcr0; 470 mtspr(SPRN_DBCR0, dbcr0); 471 } 472#endif /* CONFIG_PPC64_BOOK3E */ 473 474#ifdef CONFIG_PPC64 475 /* 476 * Collect processor utilization data per process 477 */ 478 if (firmware_has_feature(FW_FEATURE_SPLPAR)) { 479 struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); 480 long unsigned start_tb, current_tb; 481 start_tb = old_thread->start_tb; 482 cu->current_tb = current_tb = mfspr(SPRN_PURR); 483 old_thread->accum_tb += (current_tb - start_tb); 484 new_thread->start_tb = current_tb; 485 } 486#endif 487 488 local_irq_save(flags); 489 490 account_system_vtime(current); 491 account_process_vtime(current); 492 calculate_steal_time(); 493 494 /* 495 * We can't take a PMU exception inside _switch() since there is a 496 * window where the kernel stack SLB and the kernel stack are out 497 * of sync. Hard disable here. 498 */ 499 hard_irq_disable(); 500 last = _switch(old_thread, new_thread); 501 502 local_irq_restore(flags); 503 504 return last; 505} 506 507static int instructions_to_print = 16; 508 509static void show_instructions(struct pt_regs *regs) 510{ 511 int i; 512 unsigned long pc = regs->nip - (instructions_to_print * 3 / 4 * 513 sizeof(int)); 514 515 printk("Instruction dump:"); 516 517 for (i = 0; i < instructions_to_print; i++) { 518 int instr; 519 520 if (!(i % 8)) 521 printk("\n"); 522 523#if !defined(CONFIG_BOOKE) 524 /* If executing with the IMMU off, adjust pc rather 525 * than print XXXXXXXX. 526 */ 527 if (!(regs->msr & MSR_IR)) 528 pc = (unsigned long)phys_to_virt(pc); 529#endif 530 531 /* We use __get_user here *only* to avoid an OOPS on a 532 * bad address because the pc *should* only be a 533 * kernel address. 534 */ 535 if (!__kernel_text_address(pc) || 536 __get_user(instr, (unsigned int __user *)pc)) { 537 printk("XXXXXXXX "); 538 } else { 539 if (regs->nip == pc) 540 printk("<%08x> ", instr); 541 else 542 printk("%08x ", instr); 543 } 544 545 pc += sizeof(int); 546 } 547 548 printk("\n"); 549} 550 551static struct regbit { 552 unsigned long bit; 553 const char *name; 554} msr_bits[] = { 555 {MSR_EE, "EE"}, 556 {MSR_PR, "PR"}, 557 {MSR_FP, "FP"}, 558 {MSR_VEC, "VEC"}, 559 {MSR_VSX, "VSX"}, 560 {MSR_ME, "ME"}, 561 {MSR_CE, "CE"}, 562 {MSR_DE, "DE"}, 563 {MSR_IR, "IR"}, 564 {MSR_DR, "DR"}, 565 {0, NULL} 566}; 567 568static void printbits(unsigned long val, struct regbit *bits) 569{ 570 const char *sep = ""; 571 572 printk("<"); 573 for (; bits->bit; ++bits) 574 if (val & bits->bit) { 575 printk("%s%s", sep, bits->name); 576 sep = ","; 577 } 578 printk(">"); 579} 580 581#ifdef CONFIG_PPC64 582#define REG "%016lx" 583#define REGS_PER_LINE 4 584#define LAST_VOLATILE 13 585#else 586#define REG "%08lx" 587#define REGS_PER_LINE 8 588#define LAST_VOLATILE 12 589#endif 590 591void show_regs(struct pt_regs * regs) 592{ 593 int i, trap; 594 595 printk("NIP: "REG" LR: "REG" CTR: "REG"\n", 596 regs->nip, regs->link, regs->ctr); 597 printk("REGS: %p TRAP: %04lx %s (%s)\n", 598 regs, regs->trap, print_tainted(), init_utsname()->release); 599 printk("MSR: "REG" ", regs->msr); 600 printbits(regs->msr, msr_bits); 601 printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); 602 trap = TRAP(regs); 603 if (trap == 0x300 || trap == 0x600) 604#ifdef CONFIG_PPC_ADV_DEBUG_REGS 605 printk("DEAR: "REG", ESR: "REG"\n", regs->dar, regs->dsisr); 606#else 607 printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr); 608#endif 609 printk("TASK = %p[%d] '%s' THREAD: %p", 610 current, task_pid_nr(current), current->comm, task_thread_info(current)); 611 612#ifdef CONFIG_SMP 613 printk(" CPU: %d", raw_smp_processor_id()); 614#endif /* CONFIG_SMP */ 615 616 for (i = 0; i < 32; i++) { 617 if ((i % REGS_PER_LINE) == 0) 618 printk("\nGPR%02d: ", i); 619 printk(REG " ", regs->gpr[i]); 620 if (i == LAST_VOLATILE && !FULL_REGS(regs)) 621 break; 622 } 623 printk("\n"); 624#ifdef CONFIG_KALLSYMS 625 /* 626 * Lookup NIP late so we have the best change of getting the 627 * above info out without failing 628 */ 629 printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); 630 printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); 631#endif 632 show_stack(current, (unsigned long *) regs->gpr[1]); 633 if (!user_mode(regs)) 634 show_instructions(regs); 635} 636 637void exit_thread(void) 638{ 639 discard_lazy_cpu_state(); 640} 641 642void flush_thread(void) 643{ 644 discard_lazy_cpu_state(); 645 646#ifdef CONFIG_HAVE_HW_BREAKPOINTS 647 flush_ptrace_hw_breakpoint(current); 648#else /* CONFIG_HAVE_HW_BREAKPOINTS */ 649 set_debug_reg_defaults(¤t->thread); 650#endif /* CONFIG_HAVE_HW_BREAKPOINTS */ 651} 652 653void 654release_thread(struct task_struct *t) 655{ 656} 657 658/* 659 * This gets called before we allocate a new thread and copy 660 * the current task into it. 661 */ 662void prepare_to_copy(struct task_struct *tsk) 663{ 664 flush_fp_to_thread(current); 665 flush_altivec_to_thread(current); 666 flush_vsx_to_thread(current); 667 flush_spe_to_thread(current); 668#ifdef CONFIG_HAVE_HW_BREAKPOINT 669 flush_ptrace_hw_breakpoint(tsk); 670#endif /* CONFIG_HAVE_HW_BREAKPOINT */ 671} 672 673/* 674 * Copy a thread.. 675 */ 676int copy_thread(unsigned long clone_flags, unsigned long usp, 677 unsigned long unused, struct task_struct *p, 678 struct pt_regs *regs) 679{ 680 struct pt_regs *childregs, *kregs; 681 extern void ret_from_fork(void); 682 unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE; 683 684 CHECK_FULL_REGS(regs); 685 /* Copy registers */ 686 sp -= sizeof(struct pt_regs); 687 childregs = (struct pt_regs *) sp; 688 *childregs = *regs; 689 if ((childregs->msr & MSR_PR) == 0) { 690 /* for kernel thread, set `current' and stackptr in new task */ 691 childregs->gpr[1] = sp + sizeof(struct pt_regs); 692#ifdef CONFIG_PPC32 693 childregs->gpr[2] = (unsigned long) p; 694#else 695 clear_tsk_thread_flag(p, TIF_32BIT); 696#endif 697 p->thread.regs = NULL; /* no user register state */ 698 } else { 699 childregs->gpr[1] = usp; 700 p->thread.regs = childregs; 701 if (clone_flags & CLONE_SETTLS) { 702#ifdef CONFIG_PPC64 703 if (!is_32bit_task()) 704 childregs->gpr[13] = childregs->gpr[6]; 705 else 706#endif 707 childregs->gpr[2] = childregs->gpr[6]; 708 } 709 } 710 childregs->gpr[3] = 0; /* Result from fork() */ 711 sp -= STACK_FRAME_OVERHEAD; 712 713 /* 714 * The way this works is that at some point in the future 715 * some task will call _switch to switch to the new task. 716 * That will pop off the stack frame created below and start 717 * the new task running at ret_from_fork. The new task will 718 * do some house keeping and then return from the fork or clone 719 * system call, using the stack frame created above. 720 */ 721 sp -= sizeof(struct pt_regs); 722 kregs = (struct pt_regs *) sp; 723 sp -= STACK_FRAME_OVERHEAD; 724 p->thread.ksp = sp; 725 p->thread.ksp_limit = (unsigned long)task_stack_page(p) + 726 _ALIGN_UP(sizeof(struct thread_info), 16); 727 728#ifdef CONFIG_PPC_STD_MMU_64 729 if (cpu_has_feature(CPU_FTR_SLB)) { 730 unsigned long sp_vsid; 731 unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp; 732 733 if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) 734 sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T) 735 << SLB_VSID_SHIFT_1T; 736 else 737 sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_256M) 738 << SLB_VSID_SHIFT; 739 sp_vsid |= SLB_VSID_KERNEL | llp; 740 p->thread.ksp_vsid = sp_vsid; 741 } 742#endif /* CONFIG_PPC_STD_MMU_64 */ 743 744 /* 745 * The PPC64 ABI makes use of a TOC to contain function 746 * pointers. The function (ret_from_except) is actually a pointer 747 * to the TOC entry. The first entry is a pointer to the actual 748 * function. 749 */ 750#ifdef CONFIG_PPC64 751 kregs->nip = *((unsigned long *)ret_from_fork); 752#else 753 kregs->nip = (unsigned long)ret_from_fork; 754#endif 755 756 return 0; 757} 758 759/* 760 * Set up a thread for executing a new program 761 */ 762void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) 763{ 764#ifdef CONFIG_PPC64 765 unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */ 766#endif 767 768 set_fs(USER_DS); 769 770 /* 771 * If we exec out of a kernel thread then thread.regs will not be 772 * set. Do it now. 773 */ 774 if (!current->thread.regs) { 775 struct pt_regs *regs = task_stack_page(current) + THREAD_SIZE; 776 current->thread.regs = regs - 1; 777 } 778 779 memset(regs->gpr, 0, sizeof(regs->gpr)); 780 regs->ctr = 0; 781 regs->link = 0; 782 regs->xer = 0; 783 regs->ccr = 0; 784 regs->gpr[1] = sp; 785 786 /* 787 * We have just cleared all the nonvolatile GPRs, so make 788 * FULL_REGS(regs) return true. This is necessary to allow 789 * ptrace to examine the thread immediately after exec. 790 */ 791 regs->trap &= ~1UL; 792 793#ifdef CONFIG_PPC32 794 regs->mq = 0; 795 regs->nip = start; 796 regs->msr = MSR_USER; 797#else 798 if (!is_32bit_task()) { 799 unsigned long entry, toc; 800 801 /* start is a relocated pointer to the function descriptor for 802 * the elf _start routine. The first entry in the function 803 * descriptor is the entry address of _start and the second 804 * entry is the TOC value we need to use. 805 */ 806 __get_user(entry, (unsigned long __user *)start); 807 __get_user(toc, (unsigned long __user *)start+1); 808 809 /* Check whether the e_entry function descriptor entries 810 * need to be relocated before we can use them. 811 */ 812 if (load_addr != 0) { 813 entry += load_addr; 814 toc += load_addr; 815 } 816 regs->nip = entry; 817 regs->gpr[2] = toc; 818 regs->msr = MSR_USER64; 819 } else { 820 regs->nip = start; 821 regs->gpr[2] = 0; 822 regs->msr = MSR_USER32; 823 } 824#endif 825 826 discard_lazy_cpu_state(); 827#ifdef CONFIG_VSX 828 current->thread.used_vsr = 0; 829#endif 830 memset(current->thread.fpr, 0, sizeof(current->thread.fpr)); 831 current->thread.fpscr.val = 0; 832#ifdef CONFIG_ALTIVEC 833 memset(current->thread.vr, 0, sizeof(current->thread.vr)); 834 memset(¤t->thread.vscr, 0, sizeof(current->thread.vscr)); 835 current->thread.vscr.u[3] = 0x00010000; /* Java mode disabled */ 836 current->thread.vrsave = 0; 837 current->thread.used_vr = 0; 838#endif /* CONFIG_ALTIVEC */ 839#ifdef CONFIG_SPE 840 memset(current->thread.evr, 0, sizeof(current->thread.evr)); 841 current->thread.acc = 0; 842 current->thread.spefscr = 0; 843 current->thread.used_spe = 0; 844#endif /* CONFIG_SPE */ 845} 846 847#define PR_FP_ALL_EXCEPT (PR_FP_EXC_DIV | PR_FP_EXC_OVF | PR_FP_EXC_UND \ 848 | PR_FP_EXC_RES | PR_FP_EXC_INV) 849 850int set_fpexc_mode(struct task_struct *tsk, unsigned int val) 851{ 852 struct pt_regs *regs = tsk->thread.regs; 853 854 /* This is a bit hairy. If we are an SPE enabled processor 855 * (have embedded fp) we store the IEEE exception enable flags in 856 * fpexc_mode. fpexc_mode is also used for setting FP exception 857 * mode (asyn, precise, disabled) for 'Classic' FP. */ 858 if (val & PR_FP_EXC_SW_ENABLE) { 859#ifdef CONFIG_SPE 860 if (cpu_has_feature(CPU_FTR_SPE)) { 861 tsk->thread.fpexc_mode = val & 862 (PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT); 863 return 0; 864 } else { 865 return -EINVAL; 866 } 867#else 868 return -EINVAL; 869#endif 870 } 871 872 /* on a CONFIG_SPE this does not hurt us. The bits that 873 * __pack_fe01 use do not overlap with bits used for 874 * PR_FP_EXC_SW_ENABLE. Additionally, the MSR[FE0,FE1] bits 875 * on CONFIG_SPE implementations are reserved so writing to 876 * them does not change anything */ 877 if (val > PR_FP_EXC_PRECISE) 878 return -EINVAL; 879 tsk->thread.fpexc_mode = __pack_fe01(val); 880 if (regs != NULL && (regs->msr & MSR_FP) != 0) 881 regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1)) 882 | tsk->thread.fpexc_mode; 883 return 0; 884} 885 886int get_fpexc_mode(struct task_struct *tsk, unsigned long adr) 887{ 888 unsigned int val; 889 890 if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) 891#ifdef CONFIG_SPE 892 if (cpu_has_feature(CPU_FTR_SPE)) 893 val = tsk->thread.fpexc_mode; 894 else 895 return -EINVAL; 896#else 897 return -EINVAL; 898#endif 899 else 900 val = __unpack_fe01(tsk->thread.fpexc_mode); 901 return put_user(val, (unsigned int __user *) adr); 902} 903 904int set_endian(struct task_struct *tsk, unsigned int val) 905{ 906 struct pt_regs *regs = tsk->thread.regs; 907 908 if ((val == PR_ENDIAN_LITTLE && !cpu_has_feature(CPU_FTR_REAL_LE)) || 909 (val == PR_ENDIAN_PPC_LITTLE && !cpu_has_feature(CPU_FTR_PPC_LE))) 910 return -EINVAL; 911 912 if (regs == NULL) 913 return -EINVAL; 914 915 if (val == PR_ENDIAN_BIG) 916 regs->msr &= ~MSR_LE; 917 else if (val == PR_ENDIAN_LITTLE || val == PR_ENDIAN_PPC_LITTLE) 918 regs->msr |= MSR_LE; 919 else 920 return -EINVAL; 921 922 return 0; 923} 924 925int get_endian(struct task_struct *tsk, unsigned long adr) 926{ 927 struct pt_regs *regs = tsk->thread.regs; 928 unsigned int val; 929 930 if (!cpu_has_feature(CPU_FTR_PPC_LE) && 931 !cpu_has_feature(CPU_FTR_REAL_LE)) 932 return -EINVAL; 933 934 if (regs == NULL) 935 return -EINVAL; 936 937 if (regs->msr & MSR_LE) { 938 if (cpu_has_feature(CPU_FTR_REAL_LE)) 939 val = PR_ENDIAN_LITTLE; 940 else 941 val = PR_ENDIAN_PPC_LITTLE; 942 } else 943 val = PR_ENDIAN_BIG; 944 945 return put_user(val, (unsigned int __user *)adr); 946} 947 948int set_unalign_ctl(struct task_struct *tsk, unsigned int val) 949{ 950 tsk->thread.align_ctl = val; 951 return 0; 952} 953 954int get_unalign_ctl(struct task_struct *tsk, unsigned long adr) 955{ 956 return put_user(tsk->thread.align_ctl, (unsigned int __user *)adr); 957} 958 959#define TRUNC_PTR(x) ((typeof(x))(((unsigned long)(x)) & 0xffffffff)) 960 961int sys_clone(unsigned long clone_flags, unsigned long usp, 962 int __user *parent_tidp, void __user *child_threadptr, 963 int __user *child_tidp, int p6, 964 struct pt_regs *regs) 965{ 966 CHECK_FULL_REGS(regs); 967 if (usp == 0) 968 usp = regs->gpr[1]; /* stack pointer for child */ 969#ifdef CONFIG_PPC64 970 if (is_32bit_task()) { 971 parent_tidp = TRUNC_PTR(parent_tidp); 972 child_tidp = TRUNC_PTR(child_tidp); 973 } 974#endif 975 return do_fork(clone_flags, usp, regs, 0, parent_tidp, child_tidp); 976} 977 978int sys_fork(unsigned long p1, unsigned long p2, unsigned long p3, 979 unsigned long p4, unsigned long p5, unsigned long p6, 980 struct pt_regs *regs) 981{ 982 CHECK_FULL_REGS(regs); 983 return do_fork(SIGCHLD, regs->gpr[1], regs, 0, NULL, NULL); 984} 985 986int sys_vfork(unsigned long p1, unsigned long p2, unsigned long p3, 987 unsigned long p4, unsigned long p5, unsigned long p6, 988 struct pt_regs *regs) 989{ 990 CHECK_FULL_REGS(regs); 991 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->gpr[1], 992 regs, 0, NULL, NULL); 993} 994 995int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2, 996 unsigned long a3, unsigned long a4, unsigned long a5, 997 struct pt_regs *regs) 998{ 999 int error; 1000 char *filename; 1001 1002 filename = getname((const char __user *) a0); 1003 error = PTR_ERR(filename); 1004 if (IS_ERR(filename)) 1005 goto out; 1006 flush_fp_to_thread(current); 1007 flush_altivec_to_thread(current); 1008 flush_spe_to_thread(current); 1009 error = do_execve(filename, 1010 (const char __user *const __user *) a1, 1011 (const char __user *const __user *) a2, regs); 1012 putname(filename); 1013out: 1014 return error; 1015} 1016 1017static inline int valid_irq_stack(unsigned long sp, struct task_struct *p, 1018 unsigned long nbytes) 1019{ 1020 unsigned long stack_page; 1021 unsigned long cpu = task_cpu(p); 1022 1023 /* 1024 * Avoid crashing if the stack has overflowed and corrupted 1025 * task_cpu(p), which is in the thread_info struct. 1026 */ 1027 if (cpu < NR_CPUS && cpu_possible(cpu)) { 1028 stack_page = (unsigned long) hardirq_ctx[cpu]; 1029 if (sp >= stack_page + sizeof(struct thread_struct) 1030 && sp <= stack_page + THREAD_SIZE - nbytes) 1031 return 1; 1032 1033 stack_page = (unsigned long) softirq_ctx[cpu]; 1034 if (sp >= stack_page + sizeof(struct thread_struct) 1035 && sp <= stack_page + THREAD_SIZE - nbytes) 1036 return 1; 1037 } 1038 return 0; 1039} 1040 1041int validate_sp(unsigned long sp, struct task_struct *p, 1042 unsigned long nbytes) 1043{ 1044 unsigned long stack_page = (unsigned long)task_stack_page(p); 1045 1046 if (sp >= stack_page + sizeof(struct thread_struct) 1047 && sp <= stack_page + THREAD_SIZE - nbytes) 1048 return 1; 1049 1050 return valid_irq_stack(sp, p, nbytes); 1051} 1052 1053EXPORT_SYMBOL(validate_sp); 1054 1055unsigned long get_wchan(struct task_struct *p) 1056{ 1057 unsigned long ip, sp; 1058 int count = 0; 1059 1060 if (!p || p == current || p->state == TASK_RUNNING) 1061 return 0; 1062 1063 sp = p->thread.ksp; 1064 if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD)) 1065 return 0; 1066 1067 do { 1068 sp = *(unsigned long *)sp; 1069 if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD)) 1070 return 0; 1071 if (count > 0) { 1072 ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE]; 1073 if (!in_sched_functions(ip)) 1074 return ip; 1075 } 1076 } while (count++ < 16); 1077 return 0; 1078} 1079 1080static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH; 1081 1082void show_stack(struct task_struct *tsk, unsigned long *stack) 1083{ 1084 unsigned long sp, ip, lr, newsp; 1085 int count = 0; 1086 int firstframe = 1; 1087#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1088 int curr_frame = current->curr_ret_stack; 1089 extern void return_to_handler(void); 1090 unsigned long rth = (unsigned long)return_to_handler; 1091 unsigned long mrth = -1; 1092#ifdef CONFIG_PPC64 1093 extern void mod_return_to_handler(void); 1094 rth = *(unsigned long *)rth; 1095 mrth = (unsigned long)mod_return_to_handler; 1096 mrth = *(unsigned long *)mrth; 1097#endif 1098#endif 1099 1100 sp = (unsigned long) stack; 1101 if (tsk == NULL) 1102 tsk = current; 1103 if (sp == 0) { 1104 if (tsk == current) 1105 asm("mr %0,1" : "=r" (sp)); 1106 else 1107 sp = tsk->thread.ksp; 1108 } 1109 1110 lr = 0; 1111 printk("Call Trace:\n"); 1112 do { 1113 if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD)) 1114 return; 1115 1116 stack = (unsigned long *) sp; 1117 newsp = stack[0]; 1118 ip = stack[STACK_FRAME_LR_SAVE]; 1119 if (!firstframe || ip != lr) { 1120 printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip); 1121#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1122 if ((ip == rth || ip == mrth) && curr_frame >= 0) { 1123 printk(" (%pS)", 1124 (void *)current->ret_stack[curr_frame].ret); 1125 curr_frame--; 1126 } 1127#endif 1128 if (firstframe) 1129 printk(" (unreliable)"); 1130 printk("\n"); 1131 } 1132 firstframe = 0; 1133 1134 /* 1135 * See if this is an exception frame. 1136 * We look for the "regshere" marker in the current frame. 1137 */ 1138 if (validate_sp(sp, tsk, STACK_INT_FRAME_SIZE) 1139 && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { 1140 struct pt_regs *regs = (struct pt_regs *) 1141 (sp + STACK_FRAME_OVERHEAD); 1142 lr = regs->link; 1143 printk("--- Exception: %lx at %pS\n LR = %pS\n", 1144 regs->trap, (void *)regs->nip, (void *)lr); 1145 firstframe = 1; 1146 } 1147 1148 sp = newsp; 1149 } while (count++ < kstack_depth_to_print); 1150} 1151 1152void dump_stack(void) 1153{ 1154 show_stack(current, NULL); 1155} 1156EXPORT_SYMBOL(dump_stack); 1157 1158#ifdef CONFIG_PPC64 1159void ppc64_runlatch_on(void) 1160{ 1161 unsigned long ctrl; 1162 1163 if (cpu_has_feature(CPU_FTR_CTRL) && !test_thread_flag(TIF_RUNLATCH)) { 1164 HMT_medium(); 1165 1166 ctrl = mfspr(SPRN_CTRLF); 1167 ctrl |= CTRL_RUNLATCH; 1168 mtspr(SPRN_CTRLT, ctrl); 1169 1170 set_thread_flag(TIF_RUNLATCH); 1171 } 1172} 1173 1174void __ppc64_runlatch_off(void) 1175{ 1176 unsigned long ctrl; 1177 1178 HMT_medium(); 1179 1180 clear_thread_flag(TIF_RUNLATCH); 1181 1182 ctrl = mfspr(SPRN_CTRLF); 1183 ctrl &= ~CTRL_RUNLATCH; 1184 mtspr(SPRN_CTRLT, ctrl); 1185} 1186#endif 1187 1188#if THREAD_SHIFT < PAGE_SHIFT 1189 1190static struct kmem_cache *thread_info_cache; 1191 1192struct thread_info *alloc_thread_info(struct task_struct *tsk) 1193{ 1194 struct thread_info *ti; 1195 1196 ti = kmem_cache_alloc(thread_info_cache, GFP_KERNEL); 1197 if (unlikely(ti == NULL)) 1198 return NULL; 1199#ifdef CONFIG_DEBUG_STACK_USAGE 1200 memset(ti, 0, THREAD_SIZE); 1201#endif 1202 return ti; 1203} 1204 1205void free_thread_info(struct thread_info *ti) 1206{ 1207 kmem_cache_free(thread_info_cache, ti); 1208} 1209 1210void thread_info_cache_init(void) 1211{ 1212 thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE, 1213 THREAD_SIZE, 0, NULL); 1214 BUG_ON(thread_info_cache == NULL); 1215} 1216 1217#endif /* THREAD_SHIFT < PAGE_SHIFT */ 1218 1219unsigned long arch_align_stack(unsigned long sp) 1220{ 1221 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) 1222 sp -= get_random_int() & ~PAGE_MASK; 1223 return sp & ~0xf; 1224} 1225 1226static inline unsigned long brk_rnd(void) 1227{ 1228 unsigned long rnd = 0; 1229 1230 /* 8MB for 32bit, 1GB for 64bit */ 1231 if (is_32bit_task()) 1232 rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT))); 1233 else 1234 rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT))); 1235 1236 return rnd << PAGE_SHIFT; 1237} 1238 1239unsigned long arch_randomize_brk(struct mm_struct *mm) 1240{ 1241 unsigned long base = mm->brk; 1242 unsigned long ret; 1243 1244#ifdef CONFIG_PPC_STD_MMU_64 1245 /* 1246 * If we are using 1TB segments and we are allowed to randomise 1247 * the heap, we can put it above 1TB so it is backed by a 1TB 1248 * segment. Otherwise the heap will be in the bottom 1TB 1249 * which always uses 256MB segments and this may result in a 1250 * performance penalty. 1251 */ 1252 if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T)) 1253 base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T); 1254#endif 1255 1256 ret = PAGE_ALIGN(base + brk_rnd()); 1257 1258 if (ret < mm->brk) 1259 return mm->brk; 1260 1261 return ret; 1262} 1263 1264unsigned long randomize_et_dyn(unsigned long base) 1265{ 1266 unsigned long ret = PAGE_ALIGN(base + brk_rnd()); 1267 1268 if (ret < base) 1269 return base; 1270 1271 return ret; 1272} 1273 1274#ifdef CONFIG_SMP 1275int arch_sd_sibling_asym_packing(void) 1276{ 1277 if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { 1278 printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); 1279 return SD_ASYM_PACKING; 1280 } 1281 return 0; 1282} 1283#endif 1284