1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Common corrected MCE threshold handler code: 4 */ 5#include <linux/interrupt.h> 6#include <linux/kernel.h> 7 8#include <asm/irq_vectors.h> 9#include <asm/traps.h> 10#include <asm/apic.h> 11#include <asm/mce.h> 12#include <asm/trace/irq_vectors.h> 13 14#include "internal.h" 15 16static void default_threshold_interrupt(void) 17{ 18 pr_err("Unexpected threshold interrupt at vector %x\n", 19 THRESHOLD_APIC_VECTOR); 20} 21 22void (*mce_threshold_vector)(void) = default_threshold_interrupt; 23 24DEFINE_IDTENTRY_SYSVEC(sysvec_threshold) 25{ 26 trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR); 27 inc_irq_stat(irq_threshold_count); 28 mce_threshold_vector(); 29 trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR); 30 apic_eoi(); 31} 32 33DEFINE_PER_CPU(struct mca_storm_desc, storm_desc); 34 35void mce_inherit_storm(unsigned int bank) 36{ 37 struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); 38 39 /* 40 * Previous CPU owning this bank had put it into storm mode, 41 * but the precise history of that storm is unknown. Assume 42 * the worst (all recent polls of the bank found a valid error 43 * logged). This will avoid the new owner prematurely declaring 44 * the storm has ended. 45 */ 46 storm->banks[bank].history = ~0ull; 47 storm->banks[bank].timestamp = jiffies; 48} 49 50bool mce_get_storm_mode(void) 51{ 52 return __this_cpu_read(storm_desc.poll_mode); 53} 54 55void mce_set_storm_mode(bool storm) 56{ 57 __this_cpu_write(storm_desc.poll_mode, storm); 58} 59 60static void mce_handle_storm(unsigned int bank, bool on) 61{ 62 switch (boot_cpu_data.x86_vendor) { 63 case X86_VENDOR_INTEL: 64 mce_intel_handle_storm(bank, on); 65 break; 66 } 67} 68 69void cmci_storm_begin(unsigned int bank) 70{ 71 struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); 72 73 __set_bit(bank, this_cpu_ptr(mce_poll_banks)); 74 storm->banks[bank].in_storm_mode = true; 75 76 /* 77 * If this is the first bank on this CPU to enter storm mode 78 * start polling. 79 */ 80 if (++storm->stormy_bank_count == 1) 81 mce_timer_kick(true); 82} 83 84void cmci_storm_end(unsigned int bank) 85{ 86 struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); 87 88 __clear_bit(bank, this_cpu_ptr(mce_poll_banks)); 89 storm->banks[bank].history = 0; 90 storm->banks[bank].in_storm_mode = false; 91 92 /* If no banks left in storm mode, stop polling. */ 93 if (!this_cpu_dec_return(storm_desc.stormy_bank_count)) 94 mce_timer_kick(false); 95} 96 97void mce_track_storm(struct mce *mce) 98{ 99 struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); 100 unsigned long now = jiffies, delta; 101 unsigned int shift = 1; 102 u64 history = 0; 103 104 /* No tracking needed for banks that do not support CMCI */ 105 if (storm->banks[mce->bank].poll_only) 106 return; 107 108 /* 109 * When a bank is in storm mode it is polled once per second and 110 * the history mask will record about the last minute of poll results. 111 * If it is not in storm mode, then the bank is only checked when 112 * there is a CMCI interrupt. Check how long it has been since 113 * this bank was last checked, and adjust the amount of "shift" 114 * to apply to history. 115 */ 116 if (!storm->banks[mce->bank].in_storm_mode) { 117 delta = now - storm->banks[mce->bank].timestamp; 118 shift = (delta + HZ) / HZ; 119 } 120 121 /* If it has been a long time since the last poll, clear history. */ 122 if (shift < NUM_HISTORY_BITS) 123 history = storm->banks[mce->bank].history << shift; 124 125 storm->banks[mce->bank].timestamp = now; 126 127 /* History keeps track of corrected errors. VAL=1 && UC=0 */ 128 if ((mce->status & MCI_STATUS_VAL) && mce_is_correctable(mce)) 129 history |= 1; 130 131 storm->banks[mce->bank].history = history; 132 133 if (storm->banks[mce->bank].in_storm_mode) { 134 if (history & GENMASK_ULL(STORM_END_POLL_THRESHOLD, 0)) 135 return; 136 printk_deferred(KERN_NOTICE "CPU%d BANK%d CMCI storm subsided\n", smp_processor_id(), mce->bank); 137 mce_handle_storm(mce->bank, false); 138 cmci_storm_end(mce->bank); 139 } else { 140 if (hweight64(history) < STORM_BEGIN_THRESHOLD) 141 return; 142 printk_deferred(KERN_NOTICE "CPU%d BANK%d CMCI storm detected\n", smp_processor_id(), mce->bank); 143 mce_handle_storm(mce->bank, true); 144 cmci_storm_begin(mce->bank); 145 } 146} 147