1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Architecture specific (PPC64) functions for kexec based crash dumps.
4 *
5 * Copyright (C) 2005, IBM Corp.
6 *
7 * Created by: Haren Myneni
8 */
9
10#include <linux/kernel.h>
11#include <linux/smp.h>
12#include <linux/reboot.h>
13#include <linux/kexec.h>
14#include <linux/export.h>
15#include <linux/crash_dump.h>
16#include <linux/delay.h>
17#include <linux/irq.h>
18#include <linux/types.h>
19
20#include <asm/processor.h>
21#include <asm/machdep.h>
22#include <asm/kexec.h>
23#include <asm/smp.h>
24#include <asm/setjmp.h>
25#include <asm/debug.h>
26#include <asm/interrupt.h>
27
28/*
29 * The primary CPU waits a while for all secondary CPUs to enter. This is to
30 * avoid sending an IPI if the secondary CPUs are entering
31 * crash_kexec_secondary on their own (eg via a system reset).
32 *
33 * The secondary timeout has to be longer than the primary. Both timeouts are
34 * in milliseconds.
35 */
36#define PRIMARY_TIMEOUT		500
37#define SECONDARY_TIMEOUT	1000
38
39#define IPI_TIMEOUT		10000
40#define REAL_MODE_TIMEOUT	10000
41
42static int time_to_dump;
43
44/*
45 * In case of system reset, secondary CPUs enter crash_kexec_secondary with out
46 * having to send an IPI explicitly. So, indicate if the crash is via
47 * system reset to avoid sending another IPI.
48 */
49static int is_via_system_reset;
50
51/*
52 * crash_wake_offline should be set to 1 by platforms that intend to wake
53 * up offline cpus prior to jumping to a kdump kernel. Currently powernv
54 * sets it to 1, since we want to avoid things from happening when an
55 * offline CPU wakes up due to something like an HMI (malfunction error),
56 * which propagates to all threads.
57 */
58int crash_wake_offline;
59
60#define CRASH_HANDLER_MAX 3
61/* List of shutdown handles */
62static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX];
63static DEFINE_SPINLOCK(crash_handlers_lock);
64
65static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
66static int crash_shutdown_cpu = -1;
67
68static int handle_fault(struct pt_regs *regs)
69{
70	if (crash_shutdown_cpu == smp_processor_id())
71		longjmp(crash_shutdown_buf, 1);
72	return 0;
73}
74
75#ifdef CONFIG_SMP
76
77static atomic_t cpus_in_crash;
78void crash_ipi_callback(struct pt_regs *regs)
79{
80	static cpumask_t cpus_state_saved = CPU_MASK_NONE;
81
82	int cpu = smp_processor_id();
83
84	hard_irq_disable();
85	if (!cpumask_test_cpu(cpu, &cpus_state_saved)) {
86		crash_save_cpu(regs, cpu);
87		cpumask_set_cpu(cpu, &cpus_state_saved);
88	}
89
90	atomic_inc(&cpus_in_crash);
91	smp_mb__after_atomic();
92
93	/*
94	 * Starting the kdump boot.
95	 * This barrier is needed to make sure that all CPUs are stopped.
96	 */
97	while (!time_to_dump)
98		cpu_relax();
99
100	if (ppc_md.kexec_cpu_down)
101		ppc_md.kexec_cpu_down(1, 1);
102
103#ifdef CONFIG_PPC64
104	kexec_smp_wait();
105#else
106	for (;;);	/* FIXME */
107#endif
108
109	/* NOTREACHED */
110}
111
112static void crash_kexec_prepare_cpus(void)
113{
114	unsigned int msecs;
115	volatile unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
116	volatile int tries = 0;
117	int (*old_handler)(struct pt_regs *regs);
118
119	printk(KERN_EMERG "Sending IPI to other CPUs\n");
120
121	if (crash_wake_offline)
122		ncpus = num_present_cpus() - 1;
123
124	/*
125	 * If we came in via system reset, secondaries enter via crash_kexec_secondary().
126	 * So, wait a while for the secondary CPUs to enter for that case.
127	 * Else, send IPI to all other CPUs.
128	 */
129	if (is_via_system_reset)
130		mdelay(PRIMARY_TIMEOUT);
131	else
132		crash_send_ipi(crash_ipi_callback);
133	smp_wmb();
134
135again:
136	/*
137	 * FIXME: Until we will have the way to stop other CPUs reliably,
138	 * the crash CPU will send an IPI and wait for other CPUs to
139	 * respond.
140	 */
141	msecs = IPI_TIMEOUT;
142	while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0))
143		mdelay(1);
144
145	/* Would it be better to replace the trap vector here? */
146
147	if (atomic_read(&cpus_in_crash) >= ncpus) {
148		printk(KERN_EMERG "IPI complete\n");
149		return;
150	}
151
152	printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n",
153		ncpus - atomic_read(&cpus_in_crash));
154
155	/*
156	 * If we have a panic timeout set then we can't wait indefinitely
157	 * for someone to activate system reset. We also give up on the
158	 * second time through if system reset fail to work.
159	 */
160	if ((panic_timeout > 0) || (tries > 0))
161		return;
162
163	/*
164	 * A system reset will cause all CPUs to take an 0x100 exception.
165	 * The primary CPU returns here via setjmp, and the secondary
166	 * CPUs reexecute the crash_kexec_secondary path.
167	 */
168	old_handler = __debugger;
169	__debugger = handle_fault;
170	crash_shutdown_cpu = smp_processor_id();
171
172	if (setjmp(crash_shutdown_buf) == 0) {
173		printk(KERN_EMERG "Activate system reset (dumprestart) "
174				  "to stop other cpu(s)\n");
175
176		/*
177		 * A system reset will force all CPUs to execute the
178		 * crash code again. We need to reset cpus_in_crash so we
179		 * wait for everyone to do this.
180		 */
181		atomic_set(&cpus_in_crash, 0);
182		smp_mb();
183
184		while (atomic_read(&cpus_in_crash) < ncpus)
185			cpu_relax();
186	}
187
188	crash_shutdown_cpu = -1;
189	__debugger = old_handler;
190
191	tries++;
192	goto again;
193}
194
195/*
196 * This function will be called by secondary cpus.
197 */
198void crash_kexec_secondary(struct pt_regs *regs)
199{
200	unsigned long flags;
201	int msecs = SECONDARY_TIMEOUT;
202
203	local_irq_save(flags);
204
205	/* Wait for the primary crash CPU to signal its progress */
206	while (crashing_cpu < 0) {
207		if (--msecs < 0) {
208			/* No response, kdump image may not have been loaded */
209			local_irq_restore(flags);
210			return;
211		}
212
213		mdelay(1);
214	}
215
216	crash_ipi_callback(regs);
217}
218
219#else	/* ! CONFIG_SMP */
220
221static void crash_kexec_prepare_cpus(void)
222{
223	/*
224	 * move the secondaries to us so that we can copy
225	 * the new kernel 0-0x100 safely
226	 *
227	 * do this if kexec in setup.c ?
228	 */
229#ifdef CONFIG_PPC64
230	smp_release_cpus();
231#else
232	/* FIXME */
233#endif
234}
235
236void crash_kexec_secondary(struct pt_regs *regs)
237{
238}
239#endif	/* CONFIG_SMP */
240
241/* wait for all the CPUs to hit real mode but timeout if they don't come in */
242#if defined(CONFIG_SMP) && defined(CONFIG_PPC64)
243noinstr static void __maybe_unused crash_kexec_wait_realmode(int cpu)
244{
245	unsigned int msecs;
246	int i;
247
248	msecs = REAL_MODE_TIMEOUT;
249	for (i=0; i < nr_cpu_ids && msecs > 0; i++) {
250		if (i == cpu)
251			continue;
252
253		while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) {
254			barrier();
255			if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0))
256				break;
257			msecs--;
258			mdelay(1);
259		}
260	}
261	mb();
262}
263#else
264static inline void crash_kexec_wait_realmode(int cpu) {}
265#endif	/* CONFIG_SMP && CONFIG_PPC64 */
266
267void crash_kexec_prepare(void)
268{
269	/* Avoid hardlocking with irresponsive CPU holding logbuf_lock */
270	printk_deferred_enter();
271
272	/*
273	 * This function is only called after the system
274	 * has panicked or is otherwise in a critical state.
275	 * The minimum amount of code to allow a kexec'd kernel
276	 * to run successfully needs to happen here.
277	 *
278	 * In practice this means stopping other cpus in
279	 * an SMP system.
280	 * The kernel is broken so disable interrupts.
281	 */
282	hard_irq_disable();
283
284	/*
285	 * Make a note of crashing cpu. Will be used in machine_kexec
286	 * such that another IPI will not be sent.
287	 */
288	crashing_cpu = smp_processor_id();
289
290	crash_kexec_prepare_cpus();
291}
292
293/*
294 * Register a function to be called on shutdown.  Only use this if you
295 * can't reset your device in the second kernel.
296 */
297int crash_shutdown_register(crash_shutdown_t handler)
298{
299	unsigned int i, rc;
300
301	spin_lock(&crash_handlers_lock);
302	for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
303		if (!crash_shutdown_handles[i]) {
304			/* Insert handle at first empty entry */
305			crash_shutdown_handles[i] = handler;
306			rc = 0;
307			break;
308		}
309
310	if (i == CRASH_HANDLER_MAX) {
311		printk(KERN_ERR "Crash shutdown handles full, "
312		       "not registered.\n");
313		rc = 1;
314	}
315
316	spin_unlock(&crash_handlers_lock);
317	return rc;
318}
319EXPORT_SYMBOL(crash_shutdown_register);
320
321int crash_shutdown_unregister(crash_shutdown_t handler)
322{
323	unsigned int i, rc;
324
325	spin_lock(&crash_handlers_lock);
326	for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
327		if (crash_shutdown_handles[i] == handler)
328			break;
329
330	if (i == CRASH_HANDLER_MAX) {
331		printk(KERN_ERR "Crash shutdown handle not found\n");
332		rc = 1;
333	} else {
334		/* Shift handles down */
335		for (; i < (CRASH_HANDLER_MAX - 1); i++)
336			crash_shutdown_handles[i] =
337				crash_shutdown_handles[i+1];
338		/*
339		 * Reset last entry to NULL now that it has been shifted down,
340		 * this will allow new handles to be added here.
341		 */
342		crash_shutdown_handles[i] = NULL;
343		rc = 0;
344	}
345
346	spin_unlock(&crash_handlers_lock);
347	return rc;
348}
349EXPORT_SYMBOL(crash_shutdown_unregister);
350
351void default_machine_crash_shutdown(struct pt_regs *regs)
352{
353	volatile unsigned int i;
354	int (*old_handler)(struct pt_regs *regs);
355
356	if (TRAP(regs) == INTERRUPT_SYSTEM_RESET)
357		is_via_system_reset = 1;
358
359	crash_smp_send_stop();
360
361	crash_save_cpu(regs, crashing_cpu);
362
363	time_to_dump = 1;
364
365	crash_kexec_wait_realmode(crashing_cpu);
366
367	machine_kexec_mask_interrupts();
368
369	/*
370	 * Call registered shutdown routines safely.  Swap out
371	 * __debugger_fault_handler, and replace on exit.
372	 */
373	old_handler = __debugger_fault_handler;
374	__debugger_fault_handler = handle_fault;
375	crash_shutdown_cpu = smp_processor_id();
376	for (i = 0; i < CRASH_HANDLER_MAX && crash_shutdown_handles[i]; i++) {
377		if (setjmp(crash_shutdown_buf) == 0) {
378			/*
379			 * Insert syncs and delay to ensure
380			 * instructions in the dangerous region don't
381			 * leak away from this protected region.
382			 */
383			asm volatile("sync; isync");
384			/* dangerous region */
385			crash_shutdown_handles[i]();
386			asm volatile("sync; isync");
387		}
388	}
389	crash_shutdown_cpu = -1;
390	__debugger_fault_handler = old_handler;
391
392	if (ppc_md.kexec_cpu_down)
393		ppc_md.kexec_cpu_down(1, 0);
394}
395