1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (C) 2001 Dave Engebretsen IBM Corporation
4 */
5
6#include <linux/sched.h>
7#include <linux/interrupt.h>
8#include <linux/irq.h>
9#include <linux/of.h>
10#include <linux/fs.h>
11#include <linux/reboot.h>
12#include <linux/irq_work.h>
13
14#include <asm/machdep.h>
15#include <asm/rtas.h>
16#include <asm/firmware.h>
17#include <asm/mce.h>
18
19#include "pseries.h"
20
21static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX];
22static DEFINE_SPINLOCK(ras_log_buf_lock);
23
24static int ras_check_exception_token;
25
26#define EPOW_SENSOR_TOKEN	9
27#define EPOW_SENSOR_INDEX	0
28
29/* EPOW events counter variable */
30static int num_epow_events;
31
32static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id);
33static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
34static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
35
36/* RTAS pseries MCE errorlog section. */
37struct pseries_mc_errorlog {
38	__be32	fru_id;
39	__be32	proc_id;
40	u8	error_type;
41	/*
42	 * sub_err_type (1 byte). Bit fields depends on error_type
43	 *
44	 *   MSB0
45	 *   |
46	 *   V
47	 *   01234567
48	 *   XXXXXXXX
49	 *
50	 * For error_type == MC_ERROR_TYPE_UE
51	 *   XXXXXXXX
52	 *   X		1: Permanent or Transient UE.
53	 *    X		1: Effective address provided.
54	 *     X	1: Logical address provided.
55	 *      XX	2: Reserved.
56	 *        XXX	3: Type of UE error.
57	 *
58	 * For error_type == MC_ERROR_TYPE_SLB/ERAT/TLB
59	 *   XXXXXXXX
60	 *   X		1: Effective address provided.
61	 *    XXXXX	5: Reserved.
62	 *         XX	2: Type of SLB/ERAT/TLB error.
63	 *
64	 * For error_type == MC_ERROR_TYPE_CTRL_MEM_ACCESS
65	 *   XXXXXXXX
66	 *   X		1: Error causing address provided.
67	 *    XXX	3: Type of error.
68	 *       XXXX	4: Reserved.
69	 */
70	u8	sub_err_type;
71	u8	reserved_1[6];
72	__be64	effective_address;
73	__be64	logical_address;
74} __packed;
75
76/* RTAS pseries MCE error types */
77#define MC_ERROR_TYPE_UE		0x00
78#define MC_ERROR_TYPE_SLB		0x01
79#define MC_ERROR_TYPE_ERAT		0x02
80#define MC_ERROR_TYPE_UNKNOWN		0x03
81#define MC_ERROR_TYPE_TLB		0x04
82#define MC_ERROR_TYPE_D_CACHE		0x05
83#define MC_ERROR_TYPE_I_CACHE		0x07
84#define MC_ERROR_TYPE_CTRL_MEM_ACCESS	0x08
85
86/* RTAS pseries MCE error sub types */
87#define MC_ERROR_UE_INDETERMINATE		0
88#define MC_ERROR_UE_IFETCH			1
89#define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH	2
90#define MC_ERROR_UE_LOAD_STORE			3
91#define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE	4
92
93#define UE_EFFECTIVE_ADDR_PROVIDED		0x40
94#define UE_LOGICAL_ADDR_PROVIDED		0x20
95#define MC_EFFECTIVE_ADDR_PROVIDED		0x80
96
97#define MC_ERROR_SLB_PARITY		0
98#define MC_ERROR_SLB_MULTIHIT		1
99#define MC_ERROR_SLB_INDETERMINATE	2
100
101#define MC_ERROR_ERAT_PARITY		1
102#define MC_ERROR_ERAT_MULTIHIT		2
103#define MC_ERROR_ERAT_INDETERMINATE	3
104
105#define MC_ERROR_TLB_PARITY		1
106#define MC_ERROR_TLB_MULTIHIT		2
107#define MC_ERROR_TLB_INDETERMINATE	3
108
109#define MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK	0
110#define MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS	1
111
112static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
113{
114	switch (mlog->error_type) {
115	case	MC_ERROR_TYPE_UE:
116		return (mlog->sub_err_type & 0x07);
117	case	MC_ERROR_TYPE_SLB:
118	case	MC_ERROR_TYPE_ERAT:
119	case	MC_ERROR_TYPE_TLB:
120		return (mlog->sub_err_type & 0x03);
121	case	MC_ERROR_TYPE_CTRL_MEM_ACCESS:
122		return (mlog->sub_err_type & 0x70) >> 4;
123	default:
124		return 0;
125	}
126}
127
128/*
129 * Enable the hotplug interrupt late because processing them may touch other
130 * devices or systems (e.g. hugepages) that have not been initialized at the
131 * subsys stage.
132 */
133static int __init init_ras_hotplug_IRQ(void)
134{
135	struct device_node *np;
136
137	/* Hotplug Events */
138	np = of_find_node_by_path("/event-sources/hot-plug-events");
139	if (np != NULL) {
140		if (dlpar_workqueue_init() == 0)
141			request_event_sources_irqs(np, ras_hotplug_interrupt,
142						   "RAS_HOTPLUG");
143		of_node_put(np);
144	}
145
146	return 0;
147}
148machine_late_initcall(pseries, init_ras_hotplug_IRQ);
149
150/*
151 * Initialize handlers for the set of interrupts caused by hardware errors
152 * and power system events.
153 */
154static int __init init_ras_IRQ(void)
155{
156	struct device_node *np;
157
158	ras_check_exception_token = rtas_function_token(RTAS_FN_CHECK_EXCEPTION);
159
160	/* Internal Errors */
161	np = of_find_node_by_path("/event-sources/internal-errors");
162	if (np != NULL) {
163		request_event_sources_irqs(np, ras_error_interrupt,
164					   "RAS_ERROR");
165		of_node_put(np);
166	}
167
168	/* EPOW Events */
169	np = of_find_node_by_path("/event-sources/epow-events");
170	if (np != NULL) {
171		request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW");
172		of_node_put(np);
173	}
174
175	return 0;
176}
177machine_subsys_initcall(pseries, init_ras_IRQ);
178
179#define EPOW_SHUTDOWN_NORMAL				1
180#define EPOW_SHUTDOWN_ON_UPS				2
181#define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS	3
182#define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH	4
183
184static void handle_system_shutdown(char event_modifier)
185{
186	switch (event_modifier) {
187	case EPOW_SHUTDOWN_NORMAL:
188		pr_emerg("Power off requested\n");
189		orderly_poweroff(true);
190		break;
191
192	case EPOW_SHUTDOWN_ON_UPS:
193		pr_emerg("Loss of system power detected. System is running on"
194			 " UPS/battery. Check RTAS error log for details\n");
195		break;
196
197	case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
198		pr_emerg("Loss of system critical functions detected. Check"
199			 " RTAS error log for details\n");
200		orderly_poweroff(true);
201		break;
202
203	case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
204		pr_emerg("High ambient temperature detected. Check RTAS"
205			 " error log for details\n");
206		orderly_poweroff(true);
207		break;
208
209	default:
210		pr_err("Unknown power/cooling shutdown event (modifier = %d)\n",
211			event_modifier);
212	}
213}
214
215struct epow_errorlog {
216	unsigned char sensor_value;
217	unsigned char event_modifier;
218	unsigned char extended_modifier;
219	unsigned char reserved;
220	unsigned char platform_reason;
221};
222
223#define EPOW_RESET			0
224#define EPOW_WARN_COOLING		1
225#define EPOW_WARN_POWER			2
226#define EPOW_SYSTEM_SHUTDOWN		3
227#define EPOW_SYSTEM_HALT		4
228#define EPOW_MAIN_ENCLOSURE		5
229#define EPOW_POWER_OFF			7
230
231static void rtas_parse_epow_errlog(struct rtas_error_log *log)
232{
233	struct pseries_errorlog *pseries_log;
234	struct epow_errorlog *epow_log;
235	char action_code;
236	char modifier;
237
238	pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW);
239	if (pseries_log == NULL)
240		return;
241
242	epow_log = (struct epow_errorlog *)pseries_log->data;
243	action_code = epow_log->sensor_value & 0xF;	/* bottom 4 bits */
244	modifier = epow_log->event_modifier & 0xF;	/* bottom 4 bits */
245
246	switch (action_code) {
247	case EPOW_RESET:
248		if (num_epow_events) {
249			pr_info("Non critical power/cooling issue cleared\n");
250			num_epow_events--;
251		}
252		break;
253
254	case EPOW_WARN_COOLING:
255		pr_info("Non-critical cooling issue detected. Check RTAS error"
256			" log for details\n");
257		break;
258
259	case EPOW_WARN_POWER:
260		pr_info("Non-critical power issue detected. Check RTAS error"
261			" log for details\n");
262		break;
263
264	case EPOW_SYSTEM_SHUTDOWN:
265		handle_system_shutdown(modifier);
266		break;
267
268	case EPOW_SYSTEM_HALT:
269		pr_emerg("Critical power/cooling issue detected. Check RTAS"
270			 " error log for details. Powering off.\n");
271		orderly_poweroff(true);
272		break;
273
274	case EPOW_MAIN_ENCLOSURE:
275	case EPOW_POWER_OFF:
276		pr_emerg("System about to lose power. Check RTAS error log "
277			 " for details. Powering off immediately.\n");
278		emergency_sync();
279		kernel_power_off();
280		break;
281
282	default:
283		pr_err("Unknown power/cooling event (action code  = %d)\n",
284			action_code);
285	}
286
287	/* Increment epow events counter variable */
288	if (action_code != EPOW_RESET)
289		num_epow_events++;
290}
291
292static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id)
293{
294	struct pseries_errorlog *pseries_log;
295	struct pseries_hp_errorlog *hp_elog;
296
297	spin_lock(&ras_log_buf_lock);
298
299	rtas_call(ras_check_exception_token, 6, 1, NULL,
300		  RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq),
301		  RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf),
302		  rtas_get_error_log_max());
303
304	pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf,
305					   PSERIES_ELOG_SECT_ID_HOTPLUG);
306	hp_elog = (struct pseries_hp_errorlog *)pseries_log->data;
307
308	/*
309	 * Since PCI hotplug is not currently supported on pseries, put PCI
310	 * hotplug events on the ras_log_buf to be handled by rtas_errd.
311	 */
312	if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM ||
313	    hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU ||
314	    hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM)
315		queue_hotplug_event(hp_elog);
316	else
317		log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
318
319	spin_unlock(&ras_log_buf_lock);
320	return IRQ_HANDLED;
321}
322
323/* Handle environmental and power warning (EPOW) interrupts. */
324static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
325{
326	int state;
327	int critical;
328
329	rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state);
330
331	if (state > 3)
332		critical = 1;		/* Time Critical */
333	else
334		critical = 0;
335
336	spin_lock(&ras_log_buf_lock);
337
338	rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT,
339		  virq_to_hw(irq), RTAS_EPOW_WARNING, critical, __pa(&ras_log_buf),
340		  rtas_get_error_log_max());
341
342	log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
343
344	rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf);
345
346	spin_unlock(&ras_log_buf_lock);
347	return IRQ_HANDLED;
348}
349
350/*
351 * Handle hardware error interrupts.
352 *
353 * RTAS check-exception is called to collect data on the exception.  If
354 * the error is deemed recoverable, we log a warning and return.
355 * For nonrecoverable errors, an error is logged and we stop all processing
356 * as quickly as possible in order to prevent propagation of the failure.
357 */
358static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
359{
360	struct rtas_error_log *rtas_elog;
361	int status;
362	int fatal;
363
364	spin_lock(&ras_log_buf_lock);
365
366	status = rtas_call(ras_check_exception_token, 6, 1, NULL,
367			   RTAS_VECTOR_EXTERNAL_INTERRUPT,
368			   virq_to_hw(irq),
369			   RTAS_INTERNAL_ERROR, 1 /* Time Critical */,
370			   __pa(&ras_log_buf),
371				rtas_get_error_log_max());
372
373	rtas_elog = (struct rtas_error_log *)ras_log_buf;
374
375	if (status == 0 &&
376	    rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC)
377		fatal = 1;
378	else
379		fatal = 0;
380
381	/* format and print the extended information */
382	log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);
383
384	if (fatal) {
385		pr_emerg("Fatal hardware error detected. Check RTAS error"
386			 " log for details. Powering off immediately\n");
387		emergency_sync();
388		kernel_power_off();
389	} else {
390		pr_err("Recoverable hardware error detected\n");
391	}
392
393	spin_unlock(&ras_log_buf_lock);
394	return IRQ_HANDLED;
395}
396
397/*
398 * Some versions of FWNMI place the buffer inside the 4kB page starting at
399 * 0x7000. Other versions place it inside the rtas buffer. We check both.
400 * Minimum size of the buffer is 16 bytes.
401 */
402#define VALID_FWNMI_BUFFER(A) \
403	((((A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \
404	(((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16))))
405
406static inline struct rtas_error_log *fwnmi_get_errlog(void)
407{
408	return (struct rtas_error_log *)local_paca->mce_data_buf;
409}
410
411static __be64 *fwnmi_get_savep(struct pt_regs *regs)
412{
413	unsigned long savep_ra;
414
415	/* Mask top two bits */
416	savep_ra = regs->gpr[3] & ~(0x3UL << 62);
417	if (!VALID_FWNMI_BUFFER(savep_ra)) {
418		printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
419		return NULL;
420	}
421
422	return __va(savep_ra);
423}
424
425/*
426 * Get the error information for errors coming through the
427 * FWNMI vectors.  The pt_regs' r3 will be updated to reflect
428 * the actual r3 if possible, and a ptr to the error log entry
429 * will be returned if found.
430 *
431 * Use one buffer mce_data_buf per cpu to store RTAS error.
432 *
433 * The mce_data_buf does not have any locks or protection around it,
434 * if a second machine check comes in, or a system reset is done
435 * before we have logged the error, then we will get corruption in the
436 * error log.  This is preferable over holding off on calling
437 * ibm,nmi-interlock which would result in us checkstopping if a
438 * second machine check did come in.
439 */
440static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
441{
442	struct rtas_error_log *h;
443	__be64 *savep;
444
445	savep = fwnmi_get_savep(regs);
446	if (!savep)
447		return NULL;
448
449	regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */
450
451	h = (struct rtas_error_log *)&savep[1];
452	/* Use the per cpu buffer from paca to store rtas error log */
453	memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
454	if (!rtas_error_extended(h)) {
455		memcpy(local_paca->mce_data_buf, h, sizeof(__u64));
456	} else {
457		int len, error_log_length;
458
459		error_log_length = 8 + rtas_error_extended_log_length(h);
460		len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX);
461		memcpy(local_paca->mce_data_buf, h, len);
462	}
463
464	return (struct rtas_error_log *)local_paca->mce_data_buf;
465}
466
467/* Call this when done with the data returned by FWNMI_get_errinfo.
468 * It will release the saved data area for other CPUs in the
469 * partition to receive FWNMI errors.
470 */
471static void fwnmi_release_errinfo(void)
472{
473	struct rtas_args rtas_args;
474	int ret;
475
476	/*
477	 * On pseries, the machine check stack is limited to under 4GB, so
478	 * args can be on-stack.
479	 */
480	rtas_call_unlocked(&rtas_args, ibm_nmi_interlock_token, 0, 1, NULL);
481	ret = be32_to_cpu(rtas_args.rets[0]);
482	if (ret != 0)
483		printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret);
484}
485
486int pSeries_system_reset_exception(struct pt_regs *regs)
487{
488#ifdef __LITTLE_ENDIAN__
489	/*
490	 * Some firmware byteswaps SRR registers and gives incorrect SRR1. Try
491	 * to detect the bad SRR1 pattern here. Flip the NIP back to correct
492	 * endian for reporting purposes. Unfortunately the MSR can't be fixed,
493	 * so clear it. It will be missing MSR_RI so we won't try to recover.
494	 */
495	if ((be64_to_cpu(regs->msr) &
496			(MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR|
497			 MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) {
498		regs_set_return_ip(regs, be64_to_cpu((__be64)regs->nip));
499		regs_set_return_msr(regs, 0);
500	}
501#endif
502
503	if (fwnmi_active) {
504		__be64 *savep;
505
506		/*
507		 * Firmware (PowerVM and KVM) saves r3 to a save area like
508		 * machine check, which is not exactly what PAPR (2.9)
509		 * suggests but there is no way to detect otherwise, so this
510		 * is the interface now.
511		 *
512		 * System resets do not save any error log or require an
513		 * "ibm,nmi-interlock" rtas call to release.
514		 */
515
516		savep = fwnmi_get_savep(regs);
517		if (savep)
518			regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */
519	}
520
521	if (smp_handle_nmi_ipi(regs))
522		return 1;
523
524	return 0; /* need to perform reset */
525}
526
527static int mce_handle_err_realmode(int disposition, u8 error_type)
528{
529#ifdef CONFIG_PPC_BOOK3S_64
530	if (disposition == RTAS_DISP_NOT_RECOVERED) {
531		switch (error_type) {
532		case	MC_ERROR_TYPE_ERAT:
533			flush_erat();
534			disposition = RTAS_DISP_FULLY_RECOVERED;
535			break;
536		case	MC_ERROR_TYPE_SLB:
537#ifdef CONFIG_PPC_64S_HASH_MMU
538			/*
539			 * Store the old slb content in paca before flushing.
540			 * Print this when we go to virtual mode.
541			 * There are chances that we may hit MCE again if there
542			 * is a parity error on the SLB entry we trying to read
543			 * for saving. Hence limit the slb saving to single
544			 * level of recursion.
545			 */
546			if (local_paca->in_mce == 1)
547				slb_save_contents(local_paca->mce_faulty_slbs);
548			flush_and_reload_slb();
549			disposition = RTAS_DISP_FULLY_RECOVERED;
550#endif
551			break;
552		default:
553			break;
554		}
555	} else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
556		/* Platform corrected itself but could be degraded */
557		pr_err("MCE: limited recovery, system may be degraded\n");
558		disposition = RTAS_DISP_FULLY_RECOVERED;
559	}
560#endif
561	return disposition;
562}
563
564static int mce_handle_err_virtmode(struct pt_regs *regs,
565				   struct rtas_error_log *errp,
566				   struct pseries_mc_errorlog *mce_log,
567				   int disposition)
568{
569	struct mce_error_info mce_err = { 0 };
570	int initiator = rtas_error_initiator(errp);
571	int severity = rtas_error_severity(errp);
572	unsigned long eaddr = 0, paddr = 0;
573	u8 error_type, err_sub_type;
574
575	if (!mce_log)
576		goto out;
577
578	error_type = mce_log->error_type;
579	err_sub_type = rtas_mc_error_sub_type(mce_log);
580
581	if (initiator == RTAS_INITIATOR_UNKNOWN)
582		mce_err.initiator = MCE_INITIATOR_UNKNOWN;
583	else if (initiator == RTAS_INITIATOR_CPU)
584		mce_err.initiator = MCE_INITIATOR_CPU;
585	else if (initiator == RTAS_INITIATOR_PCI)
586		mce_err.initiator = MCE_INITIATOR_PCI;
587	else if (initiator == RTAS_INITIATOR_ISA)
588		mce_err.initiator = MCE_INITIATOR_ISA;
589	else if (initiator == RTAS_INITIATOR_MEMORY)
590		mce_err.initiator = MCE_INITIATOR_MEMORY;
591	else if (initiator == RTAS_INITIATOR_POWERMGM)
592		mce_err.initiator = MCE_INITIATOR_POWERMGM;
593	else
594		mce_err.initiator = MCE_INITIATOR_UNKNOWN;
595
596	if (severity == RTAS_SEVERITY_NO_ERROR)
597		mce_err.severity = MCE_SEV_NO_ERROR;
598	else if (severity == RTAS_SEVERITY_EVENT)
599		mce_err.severity = MCE_SEV_WARNING;
600	else if (severity == RTAS_SEVERITY_WARNING)
601		mce_err.severity = MCE_SEV_WARNING;
602	else if (severity == RTAS_SEVERITY_ERROR_SYNC)
603		mce_err.severity = MCE_SEV_SEVERE;
604	else if (severity == RTAS_SEVERITY_ERROR)
605		mce_err.severity = MCE_SEV_SEVERE;
606	else
607		mce_err.severity = MCE_SEV_FATAL;
608
609	if (severity <= RTAS_SEVERITY_ERROR_SYNC)
610		mce_err.sync_error = true;
611	else
612		mce_err.sync_error = false;
613
614	mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
615	mce_err.error_class = MCE_ECLASS_UNKNOWN;
616
617	switch (error_type) {
618	case MC_ERROR_TYPE_UE:
619		mce_err.error_type = MCE_ERROR_TYPE_UE;
620		mce_common_process_ue(regs, &mce_err);
621		if (mce_err.ignore_event)
622			disposition = RTAS_DISP_FULLY_RECOVERED;
623		switch (err_sub_type) {
624		case MC_ERROR_UE_IFETCH:
625			mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH;
626			break;
627		case MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH:
628			mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
629			break;
630		case MC_ERROR_UE_LOAD_STORE:
631			mce_err.u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
632			break;
633		case MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE:
634			mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
635			break;
636		case MC_ERROR_UE_INDETERMINATE:
637		default:
638			mce_err.u.ue_error_type = MCE_UE_ERROR_INDETERMINATE;
639			break;
640		}
641		if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED)
642			eaddr = be64_to_cpu(mce_log->effective_address);
643
644		if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
645			paddr = be64_to_cpu(mce_log->logical_address);
646		} else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
647			unsigned long pfn;
648
649			pfn = addr_to_pfn(regs, eaddr);
650			if (pfn != ULONG_MAX)
651				paddr = pfn << PAGE_SHIFT;
652		}
653
654		break;
655	case MC_ERROR_TYPE_SLB:
656		mce_err.error_type = MCE_ERROR_TYPE_SLB;
657		switch (err_sub_type) {
658		case MC_ERROR_SLB_PARITY:
659			mce_err.u.slb_error_type = MCE_SLB_ERROR_PARITY;
660			break;
661		case MC_ERROR_SLB_MULTIHIT:
662			mce_err.u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
663			break;
664		case MC_ERROR_SLB_INDETERMINATE:
665		default:
666			mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
667			break;
668		}
669		if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
670			eaddr = be64_to_cpu(mce_log->effective_address);
671		break;
672	case MC_ERROR_TYPE_ERAT:
673		mce_err.error_type = MCE_ERROR_TYPE_ERAT;
674		switch (err_sub_type) {
675		case MC_ERROR_ERAT_PARITY:
676			mce_err.u.erat_error_type = MCE_ERAT_ERROR_PARITY;
677			break;
678		case MC_ERROR_ERAT_MULTIHIT:
679			mce_err.u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
680			break;
681		case MC_ERROR_ERAT_INDETERMINATE:
682		default:
683			mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE;
684			break;
685		}
686		if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
687			eaddr = be64_to_cpu(mce_log->effective_address);
688		break;
689	case MC_ERROR_TYPE_TLB:
690		mce_err.error_type = MCE_ERROR_TYPE_TLB;
691		switch (err_sub_type) {
692		case MC_ERROR_TLB_PARITY:
693			mce_err.u.tlb_error_type = MCE_TLB_ERROR_PARITY;
694			break;
695		case MC_ERROR_TLB_MULTIHIT:
696			mce_err.u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
697			break;
698		case MC_ERROR_TLB_INDETERMINATE:
699		default:
700			mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE;
701			break;
702		}
703		if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
704			eaddr = be64_to_cpu(mce_log->effective_address);
705		break;
706	case MC_ERROR_TYPE_D_CACHE:
707		mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
708		break;
709	case MC_ERROR_TYPE_I_CACHE:
710		mce_err.error_type = MCE_ERROR_TYPE_ICACHE;
711		break;
712	case MC_ERROR_TYPE_CTRL_MEM_ACCESS:
713		mce_err.error_type = MCE_ERROR_TYPE_RA;
714		switch (err_sub_type) {
715		case MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK:
716			mce_err.u.ra_error_type =
717				MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN;
718			break;
719		case MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS:
720			mce_err.u.ra_error_type =
721				MCE_RA_ERROR_LOAD_STORE_FOREIGN;
722			break;
723		}
724		if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
725			eaddr = be64_to_cpu(mce_log->effective_address);
726		break;
727	case MC_ERROR_TYPE_UNKNOWN:
728	default:
729		mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
730		break;
731	}
732out:
733	save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
734		       &mce_err, regs->nip, eaddr, paddr);
735	return disposition;
736}
737
738static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
739{
740	struct pseries_errorlog *pseries_log;
741	struct pseries_mc_errorlog *mce_log = NULL;
742	int disposition = rtas_error_disposition(errp);
743	u8 error_type;
744
745	if (!rtas_error_extended(errp))
746		goto out;
747
748	pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
749	if (!pseries_log)
750		goto out;
751
752	mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
753	error_type = mce_log->error_type;
754
755	disposition = mce_handle_err_realmode(disposition, error_type);
756out:
757	disposition = mce_handle_err_virtmode(regs, errp, mce_log,
758					      disposition);
759	return disposition;
760}
761
762/*
763 * Process MCE rtas errlog event.
764 */
765void pSeries_machine_check_log_err(void)
766{
767	struct rtas_error_log *err;
768
769	err = fwnmi_get_errlog();
770	log_error((char *)err, ERR_TYPE_RTAS_LOG, 0);
771}
772
773/*
774 * See if we can recover from a machine check exception.
775 * This is only called on power4 (or above) and only via
776 * the Firmware Non-Maskable Interrupts (fwnmi) handler
777 * which provides the error analysis for us.
778 *
779 * Return 1 if corrected (or delivered a signal).
780 * Return 0 if there is nothing we can do.
781 */
782static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt)
783{
784	int recovered = 0;
785
786	if (regs_is_unrecoverable(regs)) {
787		/* If MSR_RI isn't set, we cannot recover */
788		pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
789		recovered = 0;
790	} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
791		/* Platform corrected itself */
792		recovered = 1;
793	} else if (evt->severity == MCE_SEV_FATAL) {
794		/* Fatal machine check */
795		pr_err("Machine check interrupt is fatal\n");
796		recovered = 0;
797	}
798
799	if (!recovered && evt->sync_error) {
800		/*
801		 * Try to kill processes if we get a synchronous machine check
802		 * (e.g., one caused by execution of this instruction). This
803		 * will devolve into a panic if we try to kill init or are in
804		 * an interrupt etc.
805		 *
806		 * TODO: Queue up this address for hwpoisioning later.
807		 * TODO: This is not quite right for d-side machine
808		 *       checks ->nip is not necessarily the important
809		 *       address.
810		 */
811		if ((user_mode(regs))) {
812			_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
813			recovered = 1;
814		} else if (die_will_crash()) {
815			/*
816			 * die() would kill the kernel, so better to go via
817			 * the platform reboot code that will log the
818			 * machine check.
819			 */
820			recovered = 0;
821		} else {
822			die_mce("Machine check", regs, SIGBUS);
823			recovered = 1;
824		}
825	}
826
827	return recovered;
828}
829
830/*
831 * Handle a machine check.
832 *
833 * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi)
834 * should be present.  If so the handler which called us tells us if the
835 * error was recovered (never true if RI=0).
836 *
837 * On hardware prior to Power 4 these exceptions were asynchronous which
838 * means we can't tell exactly where it occurred and so we can't recover.
839 */
840int pSeries_machine_check_exception(struct pt_regs *regs)
841{
842	struct machine_check_event evt;
843
844	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
845		return 0;
846
847	/* Print things out */
848	if (evt.version != MCE_V1) {
849		pr_err("Machine Check Exception, Unknown event version %d !\n",
850		       evt.version);
851		return 0;
852	}
853	machine_check_print_event_info(&evt, user_mode(regs), false);
854
855	if (recover_mce(regs, &evt))
856		return 1;
857
858	return 0;
859}
860
861long pseries_machine_check_realmode(struct pt_regs *regs)
862{
863	struct rtas_error_log *errp;
864	int disposition;
865
866	if (fwnmi_active) {
867		errp = fwnmi_get_errinfo(regs);
868		/*
869		 * Call to fwnmi_release_errinfo() in real mode causes kernel
870		 * to panic. Hence we will call it as soon as we go into
871		 * virtual mode.
872		 */
873		disposition = mce_handle_error(regs, errp);
874
875		fwnmi_release_errinfo();
876
877		if (disposition == RTAS_DISP_FULLY_RECOVERED)
878			return 1;
879	}
880
881	return 0;
882}
883