1/* 2 * local apic based NMI watchdog for various CPUs. 3 * 4 * This file also handles reservation of performance counters for coordination 5 * with other users (like oprofile). 6 * 7 * Note that these events normally don't tick when the CPU idles. This means 8 * the frequency varies with CPU load. 9 * 10 * Original code for K7/P6 written by Keith Owens 11 * 12 */ 13 14#include <linux/percpu.h> 15#include <linux/module.h> 16#include <linux/kernel.h> 17#include <linux/bitops.h> 18#include <linux/smp.h> 19#include <linux/nmi.h> 20#include <linux/kprobes.h> 21 22#include <asm/apic.h> 23#include <asm/perf_event.h> 24 25struct nmi_watchdog_ctlblk { 26 unsigned int cccr_msr; 27 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ 28 unsigned int evntsel_msr; /* the MSR to select the events to handle */ 29}; 30 31/* Interface defining a CPU specific perfctr watchdog */ 32struct wd_ops { 33 int (*reserve)(void); 34 void (*unreserve)(void); 35 int (*setup)(unsigned nmi_hz); 36 void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz); 37 void (*stop)(void); 38 unsigned perfctr; 39 unsigned evntsel; 40 u64 checkbit; 41}; 42 43static const struct wd_ops *wd_ops; 44 45/* 46 * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's 47 * offset from MSR_P4_BSU_ESCR0. 48 * 49 * It will be the max for all platforms (for now) 50 */ 51#define NMI_MAX_COUNTER_BITS 66 52 53/* 54 * perfctr_nmi_owner tracks the ownership of the perfctr registers: 55 * evtsel_nmi_owner tracks the ownership of the event selection 56 * - different performance counters/ event selection may be reserved for 57 * different subsystems this reservation system just tries to coordinate 58 * things a little 59 */ 60static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); 61static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); 62 63static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); 64 65/* converts an msr to an appropriate reservation bit */ 66static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) 67{ 68 /* returns the bit offset of the performance counter register */ 69 switch (boot_cpu_data.x86_vendor) { 70 case X86_VENDOR_AMD: 71 return msr - MSR_K7_PERFCTR0; 72 case X86_VENDOR_INTEL: 73 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) 74 return msr - MSR_ARCH_PERFMON_PERFCTR0; 75 76 switch (boot_cpu_data.x86) { 77 case 6: 78 return msr - MSR_P6_PERFCTR0; 79 case 15: 80 return msr - MSR_P4_BPU_PERFCTR0; 81 } 82 } 83 return 0; 84} 85 86/* 87 * converts an msr to an appropriate reservation bit 88 * returns the bit offset of the event selection register 89 */ 90static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) 91{ 92 /* returns the bit offset of the event selection register */ 93 switch (boot_cpu_data.x86_vendor) { 94 case X86_VENDOR_AMD: 95 return msr - MSR_K7_EVNTSEL0; 96 case X86_VENDOR_INTEL: 97 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) 98 return msr - MSR_ARCH_PERFMON_EVENTSEL0; 99 100 switch (boot_cpu_data.x86) { 101 case 6: 102 return msr - MSR_P6_EVNTSEL0; 103 case 15: 104 return msr - MSR_P4_BSU_ESCR0; 105 } 106 } 107 return 0; 108 109} 110 111/* checks for a bit availability (hack for oprofile) */ 112int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) 113{ 114 BUG_ON(counter > NMI_MAX_COUNTER_BITS); 115 116 return !test_bit(counter, perfctr_nmi_owner); 117} 118EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); 119 120int reserve_perfctr_nmi(unsigned int msr) 121{ 122 unsigned int counter; 123 124 counter = nmi_perfctr_msr_to_bit(msr); 125 /* register not managed by the allocator? */ 126 if (counter > NMI_MAX_COUNTER_BITS) 127 return 1; 128 129 if (!test_and_set_bit(counter, perfctr_nmi_owner)) 130 return 1; 131 return 0; 132} 133EXPORT_SYMBOL(reserve_perfctr_nmi); 134 135void release_perfctr_nmi(unsigned int msr) 136{ 137 unsigned int counter; 138 139 counter = nmi_perfctr_msr_to_bit(msr); 140 /* register not managed by the allocator? */ 141 if (counter > NMI_MAX_COUNTER_BITS) 142 return; 143 144 clear_bit(counter, perfctr_nmi_owner); 145} 146EXPORT_SYMBOL(release_perfctr_nmi); 147 148int reserve_evntsel_nmi(unsigned int msr) 149{ 150 unsigned int counter; 151 152 counter = nmi_evntsel_msr_to_bit(msr); 153 /* register not managed by the allocator? */ 154 if (counter > NMI_MAX_COUNTER_BITS) 155 return 1; 156 157 if (!test_and_set_bit(counter, evntsel_nmi_owner)) 158 return 1; 159 return 0; 160} 161EXPORT_SYMBOL(reserve_evntsel_nmi); 162 163void release_evntsel_nmi(unsigned int msr) 164{ 165 unsigned int counter; 166 167 counter = nmi_evntsel_msr_to_bit(msr); 168 /* register not managed by the allocator? */ 169 if (counter > NMI_MAX_COUNTER_BITS) 170 return; 171 172 clear_bit(counter, evntsel_nmi_owner); 173} 174EXPORT_SYMBOL(release_evntsel_nmi); 175 176void disable_lapic_nmi_watchdog(void) 177{ 178 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); 179 180 if (atomic_read(&nmi_active) <= 0) 181 return; 182 183 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); 184 185 if (wd_ops) 186 wd_ops->unreserve(); 187 188 BUG_ON(atomic_read(&nmi_active) != 0); 189} 190 191void enable_lapic_nmi_watchdog(void) 192{ 193 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); 194 195 /* are we already enabled */ 196 if (atomic_read(&nmi_active) != 0) 197 return; 198 199 /* are we lapic aware */ 200 if (!wd_ops) 201 return; 202 if (!wd_ops->reserve()) { 203 printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n"); 204 return; 205 } 206 207 on_each_cpu(setup_apic_nmi_watchdog, NULL, 1); 208 touch_nmi_watchdog(); 209} 210 211/* 212 * Activate the NMI watchdog via the local APIC. 213 */ 214 215static unsigned int adjust_for_32bit_ctr(unsigned int hz) 216{ 217 u64 counter_val; 218 unsigned int retval = hz; 219 220 /* 221 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter 222 * are writable, with higher bits sign extending from bit 31. 223 * So, we can only program the counter with 31 bit values and 224 * 32nd bit should be 1, for 33.. to be 1. 225 * Find the appropriate nmi_hz 226 */ 227 counter_val = (u64)cpu_khz * 1000; 228 do_div(counter_val, retval); 229 if (counter_val > 0x7fffffffULL) { 230 u64 count = (u64)cpu_khz * 1000; 231 do_div(count, 0x7fffffffUL); 232 retval = count + 1; 233 } 234 return retval; 235} 236 237static void write_watchdog_counter(unsigned int perfctr_msr, 238 const char *descr, unsigned nmi_hz) 239{ 240 u64 count = (u64)cpu_khz * 1000; 241 242 do_div(count, nmi_hz); 243 if (descr) 244 pr_debug("setting %s to -0x%08Lx\n", descr, count); 245 wrmsrl(perfctr_msr, 0 - count); 246} 247 248static void write_watchdog_counter32(unsigned int perfctr_msr, 249 const char *descr, unsigned nmi_hz) 250{ 251 u64 count = (u64)cpu_khz * 1000; 252 253 do_div(count, nmi_hz); 254 if (descr) 255 pr_debug("setting %s to -0x%08Lx\n", descr, count); 256 wrmsr(perfctr_msr, (u32)(-count), 0); 257} 258 259/* 260 * AMD K7/K8/Family10h/Family11h support. 261 * AMD keeps this interface nicely stable so there is not much variety 262 */ 263#define K7_EVNTSEL_ENABLE (1 << 22) 264#define K7_EVNTSEL_INT (1 << 20) 265#define K7_EVNTSEL_OS (1 << 17) 266#define K7_EVNTSEL_USR (1 << 16) 267#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 268#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 269 270static int setup_k7_watchdog(unsigned nmi_hz) 271{ 272 unsigned int perfctr_msr, evntsel_msr; 273 unsigned int evntsel; 274 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 275 276 perfctr_msr = wd_ops->perfctr; 277 evntsel_msr = wd_ops->evntsel; 278 279 wrmsrl(perfctr_msr, 0UL); 280 281 evntsel = K7_EVNTSEL_INT 282 | K7_EVNTSEL_OS 283 | K7_EVNTSEL_USR 284 | K7_NMI_EVENT; 285 286 /* setup the timer */ 287 wrmsr(evntsel_msr, evntsel, 0); 288 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz); 289 290 /* initialize the wd struct before enabling */ 291 wd->perfctr_msr = perfctr_msr; 292 wd->evntsel_msr = evntsel_msr; 293 wd->cccr_msr = 0; /* unused */ 294 295 /* ok, everything is initialized, announce that we're set */ 296 cpu_nmi_set_wd_enabled(); 297 298 apic_write(APIC_LVTPC, APIC_DM_NMI); 299 evntsel |= K7_EVNTSEL_ENABLE; 300 wrmsr(evntsel_msr, evntsel, 0); 301 302 return 1; 303} 304 305static void single_msr_stop_watchdog(void) 306{ 307 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 308 309 wrmsr(wd->evntsel_msr, 0, 0); 310} 311 312static int single_msr_reserve(void) 313{ 314 if (!reserve_perfctr_nmi(wd_ops->perfctr)) 315 return 0; 316 317 if (!reserve_evntsel_nmi(wd_ops->evntsel)) { 318 release_perfctr_nmi(wd_ops->perfctr); 319 return 0; 320 } 321 return 1; 322} 323 324static void single_msr_unreserve(void) 325{ 326 release_evntsel_nmi(wd_ops->evntsel); 327 release_perfctr_nmi(wd_ops->perfctr); 328} 329 330static void __kprobes 331single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) 332{ 333 /* start the cycle over again */ 334 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); 335} 336 337static const struct wd_ops k7_wd_ops = { 338 .reserve = single_msr_reserve, 339 .unreserve = single_msr_unreserve, 340 .setup = setup_k7_watchdog, 341 .rearm = single_msr_rearm, 342 .stop = single_msr_stop_watchdog, 343 .perfctr = MSR_K7_PERFCTR0, 344 .evntsel = MSR_K7_EVNTSEL0, 345 .checkbit = 1ULL << 47, 346}; 347 348/* 349 * Intel Model 6 (PPro+,P2,P3,P-M,Core1) 350 */ 351#define P6_EVNTSEL0_ENABLE (1 << 22) 352#define P6_EVNTSEL_INT (1 << 20) 353#define P6_EVNTSEL_OS (1 << 17) 354#define P6_EVNTSEL_USR (1 << 16) 355#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 356#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED 357 358static int setup_p6_watchdog(unsigned nmi_hz) 359{ 360 unsigned int perfctr_msr, evntsel_msr; 361 unsigned int evntsel; 362 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 363 364 perfctr_msr = wd_ops->perfctr; 365 evntsel_msr = wd_ops->evntsel; 366 367 /* KVM doesn't implement this MSR */ 368 if (wrmsr_safe(perfctr_msr, 0, 0) < 0) 369 return 0; 370 371 evntsel = P6_EVNTSEL_INT 372 | P6_EVNTSEL_OS 373 | P6_EVNTSEL_USR 374 | P6_NMI_EVENT; 375 376 /* setup the timer */ 377 wrmsr(evntsel_msr, evntsel, 0); 378 nmi_hz = adjust_for_32bit_ctr(nmi_hz); 379 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz); 380 381 /* initialize the wd struct before enabling */ 382 wd->perfctr_msr = perfctr_msr; 383 wd->evntsel_msr = evntsel_msr; 384 wd->cccr_msr = 0; /* unused */ 385 386 /* ok, everything is initialized, announce that we're set */ 387 cpu_nmi_set_wd_enabled(); 388 389 apic_write(APIC_LVTPC, APIC_DM_NMI); 390 evntsel |= P6_EVNTSEL0_ENABLE; 391 wrmsr(evntsel_msr, evntsel, 0); 392 393 return 1; 394} 395 396static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) 397{ 398 /* 399 * P6 based Pentium M need to re-unmask 400 * the apic vector but it doesn't hurt 401 * other P6 variant. 402 * ArchPerfom/Core Duo also needs this 403 */ 404 apic_write(APIC_LVTPC, APIC_DM_NMI); 405 406 /* P6/ARCH_PERFMON has 32 bit counter write */ 407 write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz); 408} 409 410static const struct wd_ops p6_wd_ops = { 411 .reserve = single_msr_reserve, 412 .unreserve = single_msr_unreserve, 413 .setup = setup_p6_watchdog, 414 .rearm = p6_rearm, 415 .stop = single_msr_stop_watchdog, 416 .perfctr = MSR_P6_PERFCTR0, 417 .evntsel = MSR_P6_EVNTSEL0, 418 .checkbit = 1ULL << 39, 419}; 420 421/* 422 * Intel P4 performance counters. 423 * By far the most complicated of all. 424 */ 425#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7) 426#define P4_ESCR_EVENT_SELECT(N) ((N) << 25) 427#define P4_ESCR_OS (1 << 3) 428#define P4_ESCR_USR (1 << 2) 429#define P4_CCCR_OVF_PMI0 (1 << 26) 430#define P4_CCCR_OVF_PMI1 (1 << 27) 431#define P4_CCCR_THRESHOLD(N) ((N) << 20) 432#define P4_CCCR_COMPLEMENT (1 << 19) 433#define P4_CCCR_COMPARE (1 << 18) 434#define P4_CCCR_REQUIRED (3 << 16) 435#define P4_CCCR_ESCR_SELECT(N) ((N) << 13) 436#define P4_CCCR_ENABLE (1 << 12) 437#define P4_CCCR_OVF (1 << 31) 438 439#define P4_CONTROLS 18 440static unsigned int p4_controls[18] = { 441 MSR_P4_BPU_CCCR0, 442 MSR_P4_BPU_CCCR1, 443 MSR_P4_BPU_CCCR2, 444 MSR_P4_BPU_CCCR3, 445 MSR_P4_MS_CCCR0, 446 MSR_P4_MS_CCCR1, 447 MSR_P4_MS_CCCR2, 448 MSR_P4_MS_CCCR3, 449 MSR_P4_FLAME_CCCR0, 450 MSR_P4_FLAME_CCCR1, 451 MSR_P4_FLAME_CCCR2, 452 MSR_P4_FLAME_CCCR3, 453 MSR_P4_IQ_CCCR0, 454 MSR_P4_IQ_CCCR1, 455 MSR_P4_IQ_CCCR2, 456 MSR_P4_IQ_CCCR3, 457 MSR_P4_IQ_CCCR4, 458 MSR_P4_IQ_CCCR5, 459}; 460/* 461 * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter 462 * CRU_ESCR0 (with any non-null event selector) through a complemented 463 * max threshold. [IA32-Vol3, Section 14.9.9] 464 */ 465static int setup_p4_watchdog(unsigned nmi_hz) 466{ 467 unsigned int perfctr_msr, evntsel_msr, cccr_msr; 468 unsigned int evntsel, cccr_val; 469 unsigned int misc_enable, dummy; 470 unsigned int ht_num; 471 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 472 473 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); 474 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) 475 return 0; 476 477#ifdef CONFIG_SMP 478 /* detect which hyperthread we are on */ 479 if (smp_num_siblings == 2) { 480 unsigned int ebx, apicid; 481 482 ebx = cpuid_ebx(1); 483 apicid = (ebx >> 24) & 0xff; 484 ht_num = apicid & 1; 485 } else 486#endif 487 ht_num = 0; 488 489 /* 490 * performance counters are shared resources 491 * assign each hyperthread its own set 492 * (re-use the ESCR0 register, seems safe 493 * and keeps the cccr_val the same) 494 */ 495 if (!ht_num) { 496 /* logical cpu 0 */ 497 perfctr_msr = MSR_P4_IQ_PERFCTR0; 498 evntsel_msr = MSR_P4_CRU_ESCR0; 499 cccr_msr = MSR_P4_IQ_CCCR0; 500 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); 501 502 /* 503 * If we're on the kdump kernel or other situation, we may 504 * still have other performance counter registers set to 505 * interrupt and they'll keep interrupting forever because 506 * of the P4_CCCR_OVF quirk. So we need to ACK all the 507 * pending interrupts and disable all the registers here, 508 * before reenabling the NMI delivery. Refer to p4_rearm() 509 * about the P4_CCCR_OVF quirk. 510 */ 511 if (reset_devices) { 512 unsigned int low, high; 513 int i; 514 515 for (i = 0; i < P4_CONTROLS; i++) { 516 rdmsr(p4_controls[i], low, high); 517 low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF); 518 wrmsr(p4_controls[i], low, high); 519 } 520 } 521 } else { 522 /* logical cpu 1 */ 523 perfctr_msr = MSR_P4_IQ_PERFCTR1; 524 evntsel_msr = MSR_P4_CRU_ESCR0; 525 cccr_msr = MSR_P4_IQ_CCCR1; 526 527 /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */ 528 if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4) 529 cccr_val = P4_CCCR_OVF_PMI0; 530 else 531 cccr_val = P4_CCCR_OVF_PMI1; 532 cccr_val |= P4_CCCR_ESCR_SELECT(4); 533 } 534 535 evntsel = P4_ESCR_EVENT_SELECT(0x3F) 536 | P4_ESCR_OS 537 | P4_ESCR_USR; 538 539 cccr_val |= P4_CCCR_THRESHOLD(15) 540 | P4_CCCR_COMPLEMENT 541 | P4_CCCR_COMPARE 542 | P4_CCCR_REQUIRED; 543 544 wrmsr(evntsel_msr, evntsel, 0); 545 wrmsr(cccr_msr, cccr_val, 0); 546 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); 547 548 wd->perfctr_msr = perfctr_msr; 549 wd->evntsel_msr = evntsel_msr; 550 wd->cccr_msr = cccr_msr; 551 552 /* ok, everything is initialized, announce that we're set */ 553 cpu_nmi_set_wd_enabled(); 554 555 apic_write(APIC_LVTPC, APIC_DM_NMI); 556 cccr_val |= P4_CCCR_ENABLE; 557 wrmsr(cccr_msr, cccr_val, 0); 558 return 1; 559} 560 561static void stop_p4_watchdog(void) 562{ 563 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 564 wrmsr(wd->cccr_msr, 0, 0); 565 wrmsr(wd->evntsel_msr, 0, 0); 566} 567 568static int p4_reserve(void) 569{ 570 if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0)) 571 return 0; 572#ifdef CONFIG_SMP 573 if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1)) 574 goto fail1; 575#endif 576 if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) 577 goto fail2; 578 /* RED-PEN why is ESCR1 not reserved here? */ 579 return 1; 580 fail2: 581#ifdef CONFIG_SMP 582 if (smp_num_siblings > 1) 583 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); 584 fail1: 585#endif 586 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); 587 return 0; 588} 589 590static void p4_unreserve(void) 591{ 592#ifdef CONFIG_SMP 593 if (smp_num_siblings > 1) 594 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); 595#endif 596 release_evntsel_nmi(MSR_P4_CRU_ESCR0); 597 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); 598} 599 600static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) 601{ 602 unsigned dummy; 603 /* 604 * P4 quirks: 605 * - An overflown perfctr will assert its interrupt 606 * until the OVF flag in its CCCR is cleared. 607 * - LVTPC is masked on interrupt and must be 608 * unmasked by the LVTPC handler. 609 */ 610 rdmsrl(wd->cccr_msr, dummy); 611 dummy &= ~P4_CCCR_OVF; 612 wrmsrl(wd->cccr_msr, dummy); 613 apic_write(APIC_LVTPC, APIC_DM_NMI); 614 /* start the cycle over again */ 615 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); 616} 617 618static const struct wd_ops p4_wd_ops = { 619 .reserve = p4_reserve, 620 .unreserve = p4_unreserve, 621 .setup = setup_p4_watchdog, 622 .rearm = p4_rearm, 623 .stop = stop_p4_watchdog, 624 /* RED-PEN this is wrong for the other sibling */ 625 .perfctr = MSR_P4_BPU_PERFCTR0, 626 .evntsel = MSR_P4_BSU_ESCR0, 627 .checkbit = 1ULL << 39, 628}; 629 630/* 631 * Watchdog using the Intel architected PerfMon. 632 * Used for Core2 and hopefully all future Intel CPUs. 633 */ 634#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 635#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK 636 637static struct wd_ops intel_arch_wd_ops; 638 639static int setup_intel_arch_watchdog(unsigned nmi_hz) 640{ 641 unsigned int ebx; 642 union cpuid10_eax eax; 643 unsigned int unused; 644 unsigned int perfctr_msr, evntsel_msr; 645 unsigned int evntsel; 646 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 647 648 /* 649 * Check whether the Architectural PerfMon supports 650 * Unhalted Core Cycles Event or not. 651 * NOTE: Corresponding bit = 0 in ebx indicates event present. 652 */ 653 cpuid(10, &(eax.full), &ebx, &unused, &unused); 654 if ((eax.split.mask_length < 655 (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || 656 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) 657 return 0; 658 659 perfctr_msr = wd_ops->perfctr; 660 evntsel_msr = wd_ops->evntsel; 661 662 wrmsrl(perfctr_msr, 0UL); 663 664 evntsel = ARCH_PERFMON_EVENTSEL_INT 665 | ARCH_PERFMON_EVENTSEL_OS 666 | ARCH_PERFMON_EVENTSEL_USR 667 | ARCH_PERFMON_NMI_EVENT_SEL 668 | ARCH_PERFMON_NMI_EVENT_UMASK; 669 670 /* setup the timer */ 671 wrmsr(evntsel_msr, evntsel, 0); 672 nmi_hz = adjust_for_32bit_ctr(nmi_hz); 673 write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); 674 675 wd->perfctr_msr = perfctr_msr; 676 wd->evntsel_msr = evntsel_msr; 677 wd->cccr_msr = 0; /* unused */ 678 679 /* ok, everything is initialized, announce that we're set */ 680 cpu_nmi_set_wd_enabled(); 681 682 apic_write(APIC_LVTPC, APIC_DM_NMI); 683 evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE; 684 wrmsr(evntsel_msr, evntsel, 0); 685 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); 686 return 1; 687} 688 689static struct wd_ops intel_arch_wd_ops __read_mostly = { 690 .reserve = single_msr_reserve, 691 .unreserve = single_msr_unreserve, 692 .setup = setup_intel_arch_watchdog, 693 .rearm = p6_rearm, 694 .stop = single_msr_stop_watchdog, 695 .perfctr = MSR_ARCH_PERFMON_PERFCTR1, 696 .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, 697}; 698 699static void probe_nmi_watchdog(void) 700{ 701 switch (boot_cpu_data.x86_vendor) { 702 case X86_VENDOR_AMD: 703 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && 704 boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17) 705 return; 706 wd_ops = &k7_wd_ops; 707 break; 708 case X86_VENDOR_INTEL: 709 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) || 710 ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 && 711 boot_cpu_data.x86_mask == 4))) { 712 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; 713 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; 714 } 715 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 716 wd_ops = &intel_arch_wd_ops; 717 break; 718 } 719 switch (boot_cpu_data.x86) { 720 case 6: 721 if (boot_cpu_data.x86_model > 13) 722 return; 723 724 wd_ops = &p6_wd_ops; 725 break; 726 case 15: 727 wd_ops = &p4_wd_ops; 728 break; 729 default: 730 return; 731 } 732 break; 733 } 734} 735 736/* Interface to nmi.c */ 737 738int lapic_watchdog_init(unsigned nmi_hz) 739{ 740 if (!wd_ops) { 741 probe_nmi_watchdog(); 742 if (!wd_ops) { 743 printk(KERN_INFO "NMI watchdog: CPU not supported\n"); 744 return -1; 745 } 746 747 if (!wd_ops->reserve()) { 748 printk(KERN_ERR 749 "NMI watchdog: cannot reserve perfctrs\n"); 750 return -1; 751 } 752 } 753 754 if (!(wd_ops->setup(nmi_hz))) { 755 printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n", 756 raw_smp_processor_id()); 757 return -1; 758 } 759 760 return 0; 761} 762 763void lapic_watchdog_stop(void) 764{ 765 if (wd_ops) 766 wd_ops->stop(); 767} 768 769unsigned lapic_adjust_nmi_hz(unsigned hz) 770{ 771 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 772 if (wd->perfctr_msr == MSR_P6_PERFCTR0 || 773 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1) 774 hz = adjust_for_32bit_ctr(hz); 775 return hz; 776} 777 778int __kprobes lapic_wd_event(unsigned nmi_hz) 779{ 780 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 781 u64 ctr; 782 783 rdmsrl(wd->perfctr_msr, ctr); 784 if (ctr & wd_ops->checkbit) /* perfctr still running? */ 785 return 0; 786 787 wd_ops->rearm(wd, nmi_hz); 788 return 1; 789} 790