1/* 2 * Architecture specific (PPC64) functions for kexec based crash dumps. 3 * 4 * Copyright (C) 2005, IBM Corp. 5 * 6 * Created by: Haren Myneni 7 * 8 * This source code is licensed under the GNU General Public License, 9 * Version 2. See the file COPYING for more details. 10 * 11 */ 12 13#undef DEBUG 14 15#include <linux/kernel.h> 16#include <linux/smp.h> 17#include <linux/reboot.h> 18#include <linux/kexec.h> 19#include <linux/bootmem.h> 20#include <linux/crash_dump.h> 21#include <linux/delay.h> 22#include <linux/elf.h> 23#include <linux/elfcore.h> 24#include <linux/init.h> 25#include <linux/irq.h> 26#include <linux/types.h> 27#include <linux/memblock.h> 28 29#include <asm/processor.h> 30#include <asm/machdep.h> 31#include <asm/kexec.h> 32#include <asm/kdump.h> 33#include <asm/prom.h> 34#include <asm/firmware.h> 35#include <asm/smp.h> 36#include <asm/system.h> 37#include <asm/setjmp.h> 38 39#ifdef DEBUG 40#include <asm/udbg.h> 41#define DBG(fmt...) udbg_printf(fmt) 42#else 43#define DBG(fmt...) 44#endif 45 46/* This keeps a track of which one is crashing cpu. */ 47int crashing_cpu = -1; 48static cpumask_t cpus_in_crash = CPU_MASK_NONE; 49cpumask_t cpus_in_sr = CPU_MASK_NONE; 50 51#define CRASH_HANDLER_MAX 2 52/* NULL terminated list of shutdown handles */ 53static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1]; 54static DEFINE_SPINLOCK(crash_handlers_lock); 55 56#ifdef CONFIG_SMP 57static atomic_t enter_on_soft_reset = ATOMIC_INIT(0); 58 59void crash_ipi_callback(struct pt_regs *regs) 60{ 61 int cpu = smp_processor_id(); 62 63 if (!cpu_online(cpu)) 64 return; 65 66 hard_irq_disable(); 67 if (!cpu_isset(cpu, cpus_in_crash)) 68 crash_save_cpu(regs, cpu); 69 cpu_set(cpu, cpus_in_crash); 70 71 /* 72 * Entered via soft-reset - could be the kdump 73 * process is invoked using soft-reset or user activated 74 * it if some CPU did not respond to an IPI. 75 * For soft-reset, the secondary CPU can enter this func 76 * twice. 1 - using IPI, and 2. soft-reset. 77 * Tell the kexec CPU that entered via soft-reset and ready 78 * to go down. 79 */ 80 if (cpu_isset(cpu, cpus_in_sr)) { 81 cpu_clear(cpu, cpus_in_sr); 82 atomic_inc(&enter_on_soft_reset); 83 } 84 85 /* 86 * Starting the kdump boot. 87 * This barrier is needed to make sure that all CPUs are stopped. 88 * If not, soft-reset will be invoked to bring other CPUs. 89 */ 90 while (!cpu_isset(crashing_cpu, cpus_in_crash)) 91 cpu_relax(); 92 93 if (ppc_md.kexec_cpu_down) 94 ppc_md.kexec_cpu_down(1, 1); 95 96#ifdef CONFIG_PPC64 97 kexec_smp_wait(); 98#else 99 for (;;); 100#endif 101 102 /* NOTREACHED */ 103} 104 105/* 106 * Wait until all CPUs are entered via soft-reset. 107 */ 108static void crash_soft_reset_check(int cpu) 109{ 110 unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ 111 112 cpu_clear(cpu, cpus_in_sr); 113 while (atomic_read(&enter_on_soft_reset) != ncpus) 114 cpu_relax(); 115} 116 117 118static void crash_kexec_prepare_cpus(int cpu) 119{ 120 unsigned int msecs; 121 122 unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ 123 124 crash_send_ipi(crash_ipi_callback); 125 smp_wmb(); 126 127 printk(KERN_EMERG "Sending IPI to other cpus...\n"); 128 msecs = 10000; 129 while ((cpus_weight(cpus_in_crash) < ncpus) && (--msecs > 0)) { 130 cpu_relax(); 131 mdelay(1); 132 } 133 134 /* Would it be better to replace the trap vector here? */ 135 136 if (cpus_weight(cpus_in_crash) < ncpus) { 137 printk(KERN_EMERG "done waiting: %d cpu(s) not responding\n", 138 ncpus - cpus_weight(cpus_in_crash)); 139 printk(KERN_EMERG "Activate soft-reset to stop other cpu(s)\n"); 140 cpus_in_sr = CPU_MASK_NONE; 141 atomic_set(&enter_on_soft_reset, 0); 142 while (cpus_weight(cpus_in_crash) < ncpus) 143 cpu_relax(); 144 } 145 /* 146 * Make sure all CPUs are entered via soft-reset if the kdump is 147 * invoked using soft-reset. 148 */ 149 if (cpu_isset(cpu, cpus_in_sr)) 150 crash_soft_reset_check(cpu); 151 /* Leave the IPI callback set */ 152} 153 154/* wait for all the CPUs to hit real mode but timeout if they don't come in */ 155#ifdef CONFIG_PPC_STD_MMU_64 156static void crash_kexec_wait_realmode(int cpu) 157{ 158 unsigned int msecs; 159 int i; 160 161 msecs = 10000; 162 for (i=0; i < NR_CPUS && msecs > 0; i++) { 163 if (i == cpu) 164 continue; 165 166 while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) { 167 barrier(); 168 if (!cpu_possible(i)) { 169 break; 170 } 171 if (!cpu_online(i)) { 172 break; 173 } 174 msecs--; 175 mdelay(1); 176 } 177 } 178 mb(); 179} 180#endif 181 182/* 183 * This function will be called by secondary cpus or by kexec cpu 184 * if soft-reset is activated to stop some CPUs. 185 */ 186void crash_kexec_secondary(struct pt_regs *regs) 187{ 188 int cpu = smp_processor_id(); 189 unsigned long flags; 190 int msecs = 5; 191 192 local_irq_save(flags); 193 /* Wait 5ms if the kexec CPU is not entered yet. */ 194 while (crashing_cpu < 0) { 195 if (--msecs < 0) { 196 /* 197 * Either kdump image is not loaded or 198 * kdump process is not started - Probably xmon 199 * exited using 'x'(exit and recover) or 200 * kexec_should_crash() failed for all running tasks. 201 */ 202 cpu_clear(cpu, cpus_in_sr); 203 local_irq_restore(flags); 204 return; 205 } 206 mdelay(1); 207 cpu_relax(); 208 } 209 if (cpu == crashing_cpu) { 210 /* 211 * Panic CPU will enter this func only via soft-reset. 212 * Wait until all secondary CPUs entered and 213 * then start kexec boot. 214 */ 215 crash_soft_reset_check(cpu); 216 cpu_set(crashing_cpu, cpus_in_crash); 217 if (ppc_md.kexec_cpu_down) 218 ppc_md.kexec_cpu_down(1, 0); 219 machine_kexec(kexec_crash_image); 220 /* NOTREACHED */ 221 } 222 crash_ipi_callback(regs); 223} 224 225#else 226static void crash_kexec_prepare_cpus(int cpu) 227{ 228 /* 229 * move the secondarys to us so that we can copy 230 * the new kernel 0-0x100 safely 231 * 232 * do this if kexec in setup.c ? 233 */ 234#ifdef CONFIG_PPC64 235 smp_release_cpus(); 236#else 237#endif 238} 239 240void crash_kexec_secondary(struct pt_regs *regs) 241{ 242 cpus_in_sr = CPU_MASK_NONE; 243} 244#endif 245#ifdef CONFIG_SPU_BASE 246 247#include <asm/spu.h> 248#include <asm/spu_priv1.h> 249 250struct crash_spu_info { 251 struct spu *spu; 252 u32 saved_spu_runcntl_RW; 253 u32 saved_spu_status_R; 254 u32 saved_spu_npc_RW; 255 u64 saved_mfc_sr1_RW; 256 u64 saved_mfc_dar; 257 u64 saved_mfc_dsisr; 258}; 259 260#define CRASH_NUM_SPUS 16 /* Enough for current hardware */ 261static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS]; 262 263static void crash_kexec_stop_spus(void) 264{ 265 struct spu *spu; 266 int i; 267 u64 tmp; 268 269 for (i = 0; i < CRASH_NUM_SPUS; i++) { 270 if (!crash_spu_info[i].spu) 271 continue; 272 273 spu = crash_spu_info[i].spu; 274 275 crash_spu_info[i].saved_spu_runcntl_RW = 276 in_be32(&spu->problem->spu_runcntl_RW); 277 crash_spu_info[i].saved_spu_status_R = 278 in_be32(&spu->problem->spu_status_R); 279 crash_spu_info[i].saved_spu_npc_RW = 280 in_be32(&spu->problem->spu_npc_RW); 281 282 crash_spu_info[i].saved_mfc_dar = spu_mfc_dar_get(spu); 283 crash_spu_info[i].saved_mfc_dsisr = spu_mfc_dsisr_get(spu); 284 tmp = spu_mfc_sr1_get(spu); 285 crash_spu_info[i].saved_mfc_sr1_RW = tmp; 286 287 tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK; 288 spu_mfc_sr1_set(spu, tmp); 289 290 __delay(200); 291 } 292} 293 294void crash_register_spus(struct list_head *list) 295{ 296 struct spu *spu; 297 298 list_for_each_entry(spu, list, full_list) { 299 if (WARN_ON(spu->number >= CRASH_NUM_SPUS)) 300 continue; 301 302 crash_spu_info[spu->number].spu = spu; 303 } 304} 305 306#else 307static inline void crash_kexec_stop_spus(void) 308{ 309} 310#endif /* CONFIG_SPU_BASE */ 311 312/* 313 * Register a function to be called on shutdown. Only use this if you 314 * can't reset your device in the second kernel. 315 */ 316int crash_shutdown_register(crash_shutdown_t handler) 317{ 318 unsigned int i, rc; 319 320 spin_lock(&crash_handlers_lock); 321 for (i = 0 ; i < CRASH_HANDLER_MAX; i++) 322 if (!crash_shutdown_handles[i]) { 323 /* Insert handle at first empty entry */ 324 crash_shutdown_handles[i] = handler; 325 rc = 0; 326 break; 327 } 328 329 if (i == CRASH_HANDLER_MAX) { 330 printk(KERN_ERR "Crash shutdown handles full, " 331 "not registered.\n"); 332 rc = 1; 333 } 334 335 spin_unlock(&crash_handlers_lock); 336 return rc; 337} 338EXPORT_SYMBOL(crash_shutdown_register); 339 340int crash_shutdown_unregister(crash_shutdown_t handler) 341{ 342 unsigned int i, rc; 343 344 spin_lock(&crash_handlers_lock); 345 for (i = 0 ; i < CRASH_HANDLER_MAX; i++) 346 if (crash_shutdown_handles[i] == handler) 347 break; 348 349 if (i == CRASH_HANDLER_MAX) { 350 printk(KERN_ERR "Crash shutdown handle not found\n"); 351 rc = 1; 352 } else { 353 /* Shift handles down */ 354 for (; crash_shutdown_handles[i]; i++) 355 crash_shutdown_handles[i] = 356 crash_shutdown_handles[i+1]; 357 rc = 0; 358 } 359 360 spin_unlock(&crash_handlers_lock); 361 return rc; 362} 363EXPORT_SYMBOL(crash_shutdown_unregister); 364 365static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; 366static int crash_shutdown_cpu = -1; 367 368static int handle_fault(struct pt_regs *regs) 369{ 370 if (crash_shutdown_cpu == smp_processor_id()) 371 longjmp(crash_shutdown_buf, 1); 372 return 0; 373} 374 375void default_machine_crash_shutdown(struct pt_regs *regs) 376{ 377 unsigned int i; 378 int (*old_handler)(struct pt_regs *regs); 379 380 381 /* 382 * This function is only called after the system 383 * has panicked or is otherwise in a critical state. 384 * The minimum amount of code to allow a kexec'd kernel 385 * to run successfully needs to happen here. 386 * 387 * In practice this means stopping other cpus in 388 * an SMP system. 389 * The kernel is broken so disable interrupts. 390 */ 391 hard_irq_disable(); 392 393 /* 394 * Make a note of crashing cpu. Will be used in machine_kexec 395 * such that another IPI will not be sent. 396 */ 397 crashing_cpu = smp_processor_id(); 398 crash_save_cpu(regs, crashing_cpu); 399 crash_kexec_prepare_cpus(crashing_cpu); 400 cpu_set(crashing_cpu, cpus_in_crash); 401#if defined(CONFIG_PPC_STD_MMU_64) && defined(CONFIG_SMP) 402 crash_kexec_wait_realmode(crashing_cpu); 403#endif 404 405 for_each_irq(i) { 406 struct irq_desc *desc = irq_to_desc(i); 407 408 if (!desc || !desc->chip || !desc->chip->eoi) 409 continue; 410 411 if (desc->status & IRQ_INPROGRESS) 412 desc->chip->eoi(i); 413 414 if (!(desc->status & IRQ_DISABLED)) 415 desc->chip->shutdown(i); 416 } 417 418 /* 419 * Call registered shutdown routines savely. Swap out 420 * __debugger_fault_handler, and replace on exit. 421 */ 422 old_handler = __debugger_fault_handler; 423 __debugger_fault_handler = handle_fault; 424 crash_shutdown_cpu = smp_processor_id(); 425 for (i = 0; crash_shutdown_handles[i]; i++) { 426 if (setjmp(crash_shutdown_buf) == 0) { 427 /* 428 * Insert syncs and delay to ensure 429 * instructions in the dangerous region don't 430 * leak away from this protected region. 431 */ 432 asm volatile("sync; isync"); 433 /* dangerous region */ 434 crash_shutdown_handles[i](); 435 asm volatile("sync; isync"); 436 } 437 } 438 crash_shutdown_cpu = -1; 439 __debugger_fault_handler = old_handler; 440 441 crash_kexec_stop_spus(); 442 443 if (ppc_md.kexec_cpu_down) 444 ppc_md.kexec_cpu_down(1, 0); 445} 446