1/*
2 * P4 specific Machine Check Exception Reporting
3 */
4
5#include <linux/init.h>
6#include <linux/types.h>
7#include <linux/kernel.h>
8#include <linux/interrupt.h>
9#include <linux/smp.h>
10
11#include <asm/processor.h>
12#include <asm/system.h>
13#include <asm/msr.h>
14#include <asm/apic.h>
15
16#include <asm/therm_throt.h>
17
18#include "mce.h"
19
20/* as supported by the P4/Xeon family */
21struct intel_mce_extended_msrs {
22	u32 eax;
23	u32 ebx;
24	u32 ecx;
25	u32 edx;
26	u32 esi;
27	u32 edi;
28	u32 ebp;
29	u32 esp;
30	u32 eflags;
31	u32 eip;
32	/* u32 *reserved[]; */
33};
34
35static int mce_num_extended_msrs = 0;
36
37
38#ifdef CONFIG_X86_MCE_P4THERMAL
39static void unexpected_thermal_interrupt(struct pt_regs *regs)
40{
41	printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
42			smp_processor_id());
43	add_taint(TAINT_MACHINE_CHECK);
44}
45
46/* P4/Xeon Thermal transition interrupt handler */
47static void intel_thermal_interrupt(struct pt_regs *regs)
48{
49	__u64 msr_val;
50
51	ack_APIC_irq();
52
53	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
54	therm_throt_process(msr_val & 0x1);
55}
56
57/* Thermal interrupt handler for this CPU setup */
58static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt;
59
60fastcall void smp_thermal_interrupt(struct pt_regs *regs)
61{
62	irq_enter();
63	vendor_thermal_interrupt(regs);
64	irq_exit();
65}
66
67/* P4/Xeon Thermal regulation detect and init */
68static void intel_init_thermal(struct cpuinfo_x86 *c)
69{
70	u32 l, h;
71	unsigned int cpu = smp_processor_id();
72
73	/* Thermal monitoring */
74	if (!cpu_has(c, X86_FEATURE_ACPI))
75		return;	/* -ENODEV */
76
77	/* Clock modulation */
78	if (!cpu_has(c, X86_FEATURE_ACC))
79		return;	/* -ENODEV */
80
81	/* first check if its enabled already, in which case there might
82	 * be some SMM goo which handles it, so we can't even put a handler
83	 * since it might be delivered via SMI already -zwanem.
84	 */
85	rdmsr (MSR_IA32_MISC_ENABLE, l, h);
86	h = apic_read(APIC_LVTTHMR);
87	if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
88		printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
89				cpu);
90		return; /* -EBUSY */
91	}
92
93	/* check whether a vector already exists, temporarily masked? */
94	if (h & APIC_VECTOR_MASK) {
95		printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
96				"installed\n",
97			cpu, (h & APIC_VECTOR_MASK));
98		return; /* -EBUSY */
99	}
100
101	/* The temperature transition interrupt handler setup */
102	h = THERMAL_APIC_VECTOR;		/* our delivery vector */
103	h |= (APIC_DM_FIXED | APIC_LVT_MASKED);	/* we'll mask till we're ready */
104	apic_write_around(APIC_LVTTHMR, h);
105
106	rdmsr (MSR_IA32_THERM_INTERRUPT, l, h);
107	wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
108
109	/* ok we're good to go... */
110	vendor_thermal_interrupt = intel_thermal_interrupt;
111
112	rdmsr (MSR_IA32_MISC_ENABLE, l, h);
113	wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
114
115	l = apic_read (APIC_LVTTHMR);
116	apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
117	printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
118
119	/* enable thermal throttle processing */
120	atomic_set(&therm_throt_en, 1);
121	return;
122}
123#endif /* CONFIG_X86_MCE_P4THERMAL */
124
125
126/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
127static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
128{
129	u32 h;
130
131	rdmsr (MSR_IA32_MCG_EAX, r->eax, h);
132	rdmsr (MSR_IA32_MCG_EBX, r->ebx, h);
133	rdmsr (MSR_IA32_MCG_ECX, r->ecx, h);
134	rdmsr (MSR_IA32_MCG_EDX, r->edx, h);
135	rdmsr (MSR_IA32_MCG_ESI, r->esi, h);
136	rdmsr (MSR_IA32_MCG_EDI, r->edi, h);
137	rdmsr (MSR_IA32_MCG_EBP, r->ebp, h);
138	rdmsr (MSR_IA32_MCG_ESP, r->esp, h);
139	rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h);
140	rdmsr (MSR_IA32_MCG_EIP, r->eip, h);
141}
142
143static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
144{
145	int recover=1;
146	u32 alow, ahigh, high, low;
147	u32 mcgstl, mcgsth;
148	int i;
149
150	rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
151	if (mcgstl & (1<<0))	/* Recoverable ? */
152		recover=0;
153
154	printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
155		smp_processor_id(), mcgsth, mcgstl);
156
157	if (mce_num_extended_msrs > 0) {
158		struct intel_mce_extended_msrs dbg;
159		intel_get_extended_msrs(&dbg);
160		printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n",
161			smp_processor_id(), dbg.eip, dbg.eflags);
162		printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n",
163			dbg.eax, dbg.ebx, dbg.ecx, dbg.edx);
164		printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
165			dbg.esi, dbg.edi, dbg.ebp, dbg.esp);
166	}
167
168	for (i=0; i<nr_mce_banks; i++) {
169		rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
170		if (high & (1<<31)) {
171			if (high & (1<<29))
172				recover |= 1;
173			if (high & (1<<25))
174				recover |= 2;
175			printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
176			high &= ~(1<<31);
177			if (high & (1<<27)) {
178				rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
179				printk ("[%08x%08x]", ahigh, alow);
180			}
181			if (high & (1<<26)) {
182				rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
183				printk (" at %08x%08x", ahigh, alow);
184			}
185			printk ("\n");
186		}
187	}
188
189	if (recover & 2)
190		panic ("CPU context corrupt");
191	if (recover & 1)
192		panic ("Unable to continue");
193
194	printk(KERN_EMERG "Attempting to continue.\n");
195	/*
196	 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
197	 * recoverable/continuable.This will allow BIOS to look at the MSRs
198	 * for errors if the OS could not log the error.
199	 */
200	for (i=0; i<nr_mce_banks; i++) {
201		u32 msr;
202		msr = MSR_IA32_MC0_STATUS+i*4;
203		rdmsr (msr, low, high);
204		if (high&(1<<31)) {
205			/* Clear it */
206			wrmsr(msr, 0UL, 0UL);
207			/* Serialize */
208			wmb();
209			add_taint(TAINT_MACHINE_CHECK);
210		}
211	}
212	mcgstl &= ~(1<<2);
213	wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
214}
215
216
217void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
218{
219	u32 l, h;
220	int i;
221
222	machine_check_vector = intel_machine_check;
223	wmb();
224
225	printk (KERN_INFO "Intel machine check architecture supported.\n");
226	rdmsr (MSR_IA32_MCG_CAP, l, h);
227	if (l & (1<<8))	/* Control register present ? */
228		wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
229	nr_mce_banks = l & 0xff;
230
231	for (i=0; i<nr_mce_banks; i++) {
232		wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
233		wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
234	}
235
236	set_in_cr4 (X86_CR4_MCE);
237	printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
238		smp_processor_id());
239
240	/* Check for P4/Xeon extended MCE MSRs */
241	rdmsr (MSR_IA32_MCG_CAP, l, h);
242	if (l & (1<<9))	{/* MCG_EXT_P */
243		mce_num_extended_msrs = (l >> 16) & 0xff;
244		printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
245				" available\n",
246			smp_processor_id(), mce_num_extended_msrs);
247
248#ifdef CONFIG_X86_MCE_P4THERMAL
249		/* Check for P4/Xeon Thermal monitor */
250		intel_init_thermal(c);
251#endif
252	}
253}
254