1/** 2 * @file nmi_int.c 3 * 4 * @remark Copyright 2002-2009 OProfile authors 5 * @remark Read the file COPYING 6 * 7 * @author John Levon <levon@movementarian.org> 8 * @author Robert Richter <robert.richter@amd.com> 9 * @author Barry Kasindorf <barry.kasindorf@amd.com> 10 * @author Jason Yeh <jason.yeh@amd.com> 11 * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> 12 */ 13 14#include <linux/init.h> 15#include <linux/notifier.h> 16#include <linux/smp.h> 17#include <linux/oprofile.h> 18#include <linux/sysdev.h> 19#include <linux/slab.h> 20#include <linux/moduleparam.h> 21#include <linux/kdebug.h> 22#include <linux/cpu.h> 23#include <asm/nmi.h> 24#include <asm/msr.h> 25#include <asm/apic.h> 26 27#include "op_counter.h" 28#include "op_x86_model.h" 29 30static struct op_x86_model_spec *model; 31static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); 32static DEFINE_PER_CPU(unsigned long, saved_lvtpc); 33 34/* must be protected with get_online_cpus()/put_online_cpus(): */ 35static int nmi_enabled; 36static int ctr_running; 37 38struct op_counter_config counter_config[OP_MAX_COUNTER]; 39 40/* common functions */ 41 42u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, 43 struct op_counter_config *counter_config) 44{ 45 u64 val = 0; 46 u16 event = (u16)counter_config->event; 47 48 val |= ARCH_PERFMON_EVENTSEL_INT; 49 val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0; 50 val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0; 51 val |= (counter_config->unit_mask & 0xFF) << 8; 52 event &= model->event_mask ? model->event_mask : 0xFF; 53 val |= event & 0xFF; 54 val |= (event & 0x0F00) << 24; 55 56 return val; 57} 58 59 60static int profile_exceptions_notify(struct notifier_block *self, 61 unsigned long val, void *data) 62{ 63 struct die_args *args = (struct die_args *)data; 64 int ret = NOTIFY_DONE; 65 66 switch (val) { 67 case DIE_NMI: 68 case DIE_NMI_IPI: 69 if (ctr_running) 70 model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs)); 71 else if (!nmi_enabled) 72 break; 73 else 74 model->stop(&__get_cpu_var(cpu_msrs)); 75 ret = NOTIFY_STOP; 76 break; 77 default: 78 break; 79 } 80 return ret; 81} 82 83static void nmi_cpu_save_registers(struct op_msrs *msrs) 84{ 85 struct op_msr *counters = msrs->counters; 86 struct op_msr *controls = msrs->controls; 87 unsigned int i; 88 89 for (i = 0; i < model->num_counters; ++i) { 90 if (counters[i].addr) 91 rdmsrl(counters[i].addr, counters[i].saved); 92 } 93 94 for (i = 0; i < model->num_controls; ++i) { 95 if (controls[i].addr) 96 rdmsrl(controls[i].addr, controls[i].saved); 97 } 98} 99 100static void nmi_cpu_start(void *dummy) 101{ 102 struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); 103 if (!msrs->controls) 104 WARN_ON_ONCE(1); 105 else 106 model->start(msrs); 107} 108 109static int nmi_start(void) 110{ 111 get_online_cpus(); 112 on_each_cpu(nmi_cpu_start, NULL, 1); 113 ctr_running = 1; 114 put_online_cpus(); 115 return 0; 116} 117 118static void nmi_cpu_stop(void *dummy) 119{ 120 struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); 121 if (!msrs->controls) 122 WARN_ON_ONCE(1); 123 else 124 model->stop(msrs); 125} 126 127static void nmi_stop(void) 128{ 129 get_online_cpus(); 130 on_each_cpu(nmi_cpu_stop, NULL, 1); 131 ctr_running = 0; 132 put_online_cpus(); 133} 134 135#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX 136 137static DEFINE_PER_CPU(int, switch_index); 138 139static inline int has_mux(void) 140{ 141 return !!model->switch_ctrl; 142} 143 144inline int op_x86_phys_to_virt(int phys) 145{ 146 return __get_cpu_var(switch_index) + phys; 147} 148 149inline int op_x86_virt_to_phys(int virt) 150{ 151 return virt % model->num_counters; 152} 153 154static void nmi_shutdown_mux(void) 155{ 156 int i; 157 158 if (!has_mux()) 159 return; 160 161 for_each_possible_cpu(i) { 162 kfree(per_cpu(cpu_msrs, i).multiplex); 163 per_cpu(cpu_msrs, i).multiplex = NULL; 164 per_cpu(switch_index, i) = 0; 165 } 166} 167 168static int nmi_setup_mux(void) 169{ 170 size_t multiplex_size = 171 sizeof(struct op_msr) * model->num_virt_counters; 172 int i; 173 174 if (!has_mux()) 175 return 1; 176 177 for_each_possible_cpu(i) { 178 per_cpu(cpu_msrs, i).multiplex = 179 kzalloc(multiplex_size, GFP_KERNEL); 180 if (!per_cpu(cpu_msrs, i).multiplex) 181 return 0; 182 } 183 184 return 1; 185} 186 187static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) 188{ 189 int i; 190 struct op_msr *multiplex = msrs->multiplex; 191 192 if (!has_mux()) 193 return; 194 195 for (i = 0; i < model->num_virt_counters; ++i) { 196 if (counter_config[i].enabled) { 197 multiplex[i].saved = -(u64)counter_config[i].count; 198 } else { 199 multiplex[i].saved = 0; 200 } 201 } 202 203 per_cpu(switch_index, cpu) = 0; 204} 205 206static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) 207{ 208 struct op_msr *counters = msrs->counters; 209 struct op_msr *multiplex = msrs->multiplex; 210 int i; 211 212 for (i = 0; i < model->num_counters; ++i) { 213 int virt = op_x86_phys_to_virt(i); 214 if (counters[i].addr) 215 rdmsrl(counters[i].addr, multiplex[virt].saved); 216 } 217} 218 219static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) 220{ 221 struct op_msr *counters = msrs->counters; 222 struct op_msr *multiplex = msrs->multiplex; 223 int i; 224 225 for (i = 0; i < model->num_counters; ++i) { 226 int virt = op_x86_phys_to_virt(i); 227 if (counters[i].addr) 228 wrmsrl(counters[i].addr, multiplex[virt].saved); 229 } 230} 231 232static void nmi_cpu_switch(void *dummy) 233{ 234 int cpu = smp_processor_id(); 235 int si = per_cpu(switch_index, cpu); 236 struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); 237 238 nmi_cpu_stop(NULL); 239 nmi_cpu_save_mpx_registers(msrs); 240 241 /* move to next set */ 242 si += model->num_counters; 243 if ((si >= model->num_virt_counters) || (counter_config[si].count == 0)) 244 per_cpu(switch_index, cpu) = 0; 245 else 246 per_cpu(switch_index, cpu) = si; 247 248 model->switch_ctrl(model, msrs); 249 nmi_cpu_restore_mpx_registers(msrs); 250 251 nmi_cpu_start(NULL); 252} 253 254 255/* 256 * Quick check to see if multiplexing is necessary. 257 * The check should be sufficient since counters are used 258 * in ordre. 259 */ 260static int nmi_multiplex_on(void) 261{ 262 return counter_config[model->num_counters].count ? 0 : -EINVAL; 263} 264 265static int nmi_switch_event(void) 266{ 267 if (!has_mux()) 268 return -ENOSYS; /* not implemented */ 269 if (nmi_multiplex_on() < 0) 270 return -EINVAL; /* not necessary */ 271 272 get_online_cpus(); 273 if (ctr_running) 274 on_each_cpu(nmi_cpu_switch, NULL, 1); 275 put_online_cpus(); 276 277 return 0; 278} 279 280static inline void mux_init(struct oprofile_operations *ops) 281{ 282 if (has_mux()) 283 ops->switch_events = nmi_switch_event; 284} 285 286static void mux_clone(int cpu) 287{ 288 if (!has_mux()) 289 return; 290 291 memcpy(per_cpu(cpu_msrs, cpu).multiplex, 292 per_cpu(cpu_msrs, 0).multiplex, 293 sizeof(struct op_msr) * model->num_virt_counters); 294} 295 296#else 297 298inline int op_x86_phys_to_virt(int phys) { return phys; } 299inline int op_x86_virt_to_phys(int virt) { return virt; } 300static inline void nmi_shutdown_mux(void) { } 301static inline int nmi_setup_mux(void) { return 1; } 302static inline void 303nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { } 304static inline void mux_init(struct oprofile_operations *ops) { } 305static void mux_clone(int cpu) { } 306 307#endif 308 309static void free_msrs(void) 310{ 311 int i; 312 for_each_possible_cpu(i) { 313 kfree(per_cpu(cpu_msrs, i).counters); 314 per_cpu(cpu_msrs, i).counters = NULL; 315 kfree(per_cpu(cpu_msrs, i).controls); 316 per_cpu(cpu_msrs, i).controls = NULL; 317 } 318 nmi_shutdown_mux(); 319} 320 321static int allocate_msrs(void) 322{ 323 size_t controls_size = sizeof(struct op_msr) * model->num_controls; 324 size_t counters_size = sizeof(struct op_msr) * model->num_counters; 325 326 int i; 327 for_each_possible_cpu(i) { 328 per_cpu(cpu_msrs, i).counters = kzalloc(counters_size, 329 GFP_KERNEL); 330 if (!per_cpu(cpu_msrs, i).counters) 331 goto fail; 332 per_cpu(cpu_msrs, i).controls = kzalloc(controls_size, 333 GFP_KERNEL); 334 if (!per_cpu(cpu_msrs, i).controls) 335 goto fail; 336 } 337 338 if (!nmi_setup_mux()) 339 goto fail; 340 341 return 1; 342 343fail: 344 free_msrs(); 345 return 0; 346} 347 348static void nmi_cpu_setup(void *dummy) 349{ 350 int cpu = smp_processor_id(); 351 struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); 352 nmi_cpu_save_registers(msrs); 353 spin_lock(&oprofilefs_lock); 354 model->setup_ctrs(model, msrs); 355 nmi_cpu_setup_mux(cpu, msrs); 356 spin_unlock(&oprofilefs_lock); 357 per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC); 358 apic_write(APIC_LVTPC, APIC_DM_NMI); 359} 360 361static struct notifier_block profile_exceptions_nb = { 362 .notifier_call = profile_exceptions_notify, 363 .next = NULL, 364 .priority = 2 365}; 366 367static void nmi_cpu_restore_registers(struct op_msrs *msrs) 368{ 369 struct op_msr *counters = msrs->counters; 370 struct op_msr *controls = msrs->controls; 371 unsigned int i; 372 373 for (i = 0; i < model->num_controls; ++i) { 374 if (controls[i].addr) 375 wrmsrl(controls[i].addr, controls[i].saved); 376 } 377 378 for (i = 0; i < model->num_counters; ++i) { 379 if (counters[i].addr) 380 wrmsrl(counters[i].addr, counters[i].saved); 381 } 382} 383 384static void nmi_cpu_shutdown(void *dummy) 385{ 386 unsigned int v; 387 int cpu = smp_processor_id(); 388 struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); 389 390 /* restoring APIC_LVTPC can trigger an apic error because the delivery 391 * mode and vector nr combination can be illegal. That's by design: on 392 * power on apic lvt contain a zero vector nr which are legal only for 393 * NMI delivery mode. So inhibit apic err before restoring lvtpc 394 */ 395 v = apic_read(APIC_LVTERR); 396 apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); 397 apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); 398 apic_write(APIC_LVTERR, v); 399 nmi_cpu_restore_registers(msrs); 400 if (model->cpu_down) 401 model->cpu_down(); 402} 403 404static void nmi_cpu_up(void *dummy) 405{ 406 if (nmi_enabled) 407 nmi_cpu_setup(dummy); 408 if (ctr_running) 409 nmi_cpu_start(dummy); 410} 411 412static void nmi_cpu_down(void *dummy) 413{ 414 if (ctr_running) 415 nmi_cpu_stop(dummy); 416 if (nmi_enabled) 417 nmi_cpu_shutdown(dummy); 418} 419 420static int nmi_create_files(struct super_block *sb, struct dentry *root) 421{ 422 unsigned int i; 423 424 for (i = 0; i < model->num_virt_counters; ++i) { 425 struct dentry *dir; 426 char buf[4]; 427 428 /* quick little hack to _not_ expose a counter if it is not 429 * available for use. This should protect userspace app. 430 * NOTE: assumes 1:1 mapping here (that counters are organized 431 * sequentially in their struct assignment). 432 */ 433 if (!avail_to_resrv_perfctr_nmi_bit(op_x86_virt_to_phys(i))) 434 continue; 435 436 snprintf(buf, sizeof(buf), "%d", i); 437 dir = oprofilefs_mkdir(sb, root, buf); 438 oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); 439 oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); 440 oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count); 441 oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); 442 oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); 443 oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); 444 } 445 446 return 0; 447} 448 449static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, 450 void *data) 451{ 452 int cpu = (unsigned long)data; 453 switch (action) { 454 case CPU_DOWN_FAILED: 455 case CPU_ONLINE: 456 smp_call_function_single(cpu, nmi_cpu_up, NULL, 0); 457 break; 458 case CPU_DOWN_PREPARE: 459 smp_call_function_single(cpu, nmi_cpu_down, NULL, 1); 460 break; 461 } 462 return NOTIFY_DONE; 463} 464 465static struct notifier_block oprofile_cpu_nb = { 466 .notifier_call = oprofile_cpu_notifier 467}; 468 469static int nmi_setup(void) 470{ 471 int err = 0; 472 int cpu; 473 474 if (!allocate_msrs()) 475 return -ENOMEM; 476 477 /* We need to serialize save and setup for HT because the subset 478 * of msrs are distinct for save and setup operations 479 */ 480 481 /* Assume saved/restored counters are the same on all CPUs */ 482 err = model->fill_in_addresses(&per_cpu(cpu_msrs, 0)); 483 if (err) 484 goto fail; 485 486 for_each_possible_cpu(cpu) { 487 if (!cpu) 488 continue; 489 490 memcpy(per_cpu(cpu_msrs, cpu).counters, 491 per_cpu(cpu_msrs, 0).counters, 492 sizeof(struct op_msr) * model->num_counters); 493 494 memcpy(per_cpu(cpu_msrs, cpu).controls, 495 per_cpu(cpu_msrs, 0).controls, 496 sizeof(struct op_msr) * model->num_controls); 497 498 mux_clone(cpu); 499 } 500 501 nmi_enabled = 0; 502 ctr_running = 0; 503 barrier(); 504 err = register_die_notifier(&profile_exceptions_nb); 505 if (err) 506 goto fail; 507 508 get_online_cpus(); 509 register_cpu_notifier(&oprofile_cpu_nb); 510 on_each_cpu(nmi_cpu_setup, NULL, 1); 511 nmi_enabled = 1; 512 put_online_cpus(); 513 514 return 0; 515fail: 516 free_msrs(); 517 return err; 518} 519 520static void nmi_shutdown(void) 521{ 522 struct op_msrs *msrs; 523 524 get_online_cpus(); 525 unregister_cpu_notifier(&oprofile_cpu_nb); 526 on_each_cpu(nmi_cpu_shutdown, NULL, 1); 527 nmi_enabled = 0; 528 ctr_running = 0; 529 put_online_cpus(); 530 barrier(); 531 unregister_die_notifier(&profile_exceptions_nb); 532 msrs = &get_cpu_var(cpu_msrs); 533 model->shutdown(msrs); 534 free_msrs(); 535 put_cpu_var(cpu_msrs); 536} 537 538#ifdef CONFIG_PM 539 540static int nmi_suspend(struct sys_device *dev, pm_message_t state) 541{ 542 /* Only one CPU left, just stop that one */ 543 if (nmi_enabled == 1) 544 nmi_cpu_stop(NULL); 545 return 0; 546} 547 548static int nmi_resume(struct sys_device *dev) 549{ 550 if (nmi_enabled == 1) 551 nmi_cpu_start(NULL); 552 return 0; 553} 554 555static struct sysdev_class oprofile_sysclass = { 556 .name = "oprofile", 557 .resume = nmi_resume, 558 .suspend = nmi_suspend, 559}; 560 561static struct sys_device device_oprofile = { 562 .id = 0, 563 .cls = &oprofile_sysclass, 564}; 565 566static int __init init_sysfs(void) 567{ 568 int error; 569 570 error = sysdev_class_register(&oprofile_sysclass); 571 if (error) 572 return error; 573 574 error = sysdev_register(&device_oprofile); 575 if (error) 576 sysdev_class_unregister(&oprofile_sysclass); 577 578 return error; 579} 580 581static void exit_sysfs(void) 582{ 583 sysdev_unregister(&device_oprofile); 584 sysdev_class_unregister(&oprofile_sysclass); 585} 586 587#else 588 589static inline int init_sysfs(void) { return 0; } 590static inline void exit_sysfs(void) { } 591 592#endif /* CONFIG_PM */ 593 594static int __init p4_init(char **cpu_type) 595{ 596 __u8 cpu_model = boot_cpu_data.x86_model; 597 598 if (cpu_model > 6 || cpu_model == 5) 599 return 0; 600 601#ifndef CONFIG_SMP 602 *cpu_type = "i386/p4"; 603 model = &op_p4_spec; 604 return 1; 605#else 606 switch (smp_num_siblings) { 607 case 1: 608 *cpu_type = "i386/p4"; 609 model = &op_p4_spec; 610 return 1; 611 612 case 2: 613 *cpu_type = "i386/p4-ht"; 614 model = &op_p4_ht2_spec; 615 return 1; 616 } 617#endif 618 619 printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n"); 620 printk(KERN_INFO "oprofile: Reverting to timer mode.\n"); 621 return 0; 622} 623 624static int force_arch_perfmon; 625static int force_cpu_type(const char *str, struct kernel_param *kp) 626{ 627 if (!strcmp(str, "arch_perfmon")) { 628 force_arch_perfmon = 1; 629 printk(KERN_INFO "oprofile: forcing architectural perfmon\n"); 630 } 631 632 return 0; 633} 634module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0); 635 636static int __init ppro_init(char **cpu_type) 637{ 638 __u8 cpu_model = boot_cpu_data.x86_model; 639 struct op_x86_model_spec *spec = &op_ppro_spec; /* default */ 640 641 if (force_arch_perfmon && cpu_has_arch_perfmon) 642 return 0; 643 644 /* 645 * Documentation on identifying Intel processors by CPU family 646 * and model can be found in the Intel Software Developer's 647 * Manuals (SDM): 648 * 649 * http://www.intel.com/products/processor/manuals/ 650 * 651 * As of May 2010 the documentation for this was in the: 652 * "Intel 64 and IA-32 Architectures Software Developer's 653 * Manual Volume 3B: System Programming Guide", "Table B-1 654 * CPUID Signature Values of DisplayFamily_DisplayModel". 655 */ 656 switch (cpu_model) { 657 case 0 ... 2: 658 *cpu_type = "i386/ppro"; 659 break; 660 case 3 ... 5: 661 *cpu_type = "i386/pii"; 662 break; 663 case 6 ... 8: 664 case 10 ... 11: 665 *cpu_type = "i386/piii"; 666 break; 667 case 9: 668 case 13: 669 *cpu_type = "i386/p6_mobile"; 670 break; 671 case 14: 672 *cpu_type = "i386/core"; 673 break; 674 case 0x0f: 675 case 0x16: 676 case 0x17: 677 case 0x1d: 678 *cpu_type = "i386/core_2"; 679 break; 680 case 0x1a: 681 case 0x1e: 682 case 0x2e: 683 spec = &op_arch_perfmon_spec; 684 *cpu_type = "i386/core_i7"; 685 break; 686 case 0x1c: 687 *cpu_type = "i386/atom"; 688 break; 689 default: 690 /* Unknown */ 691 return 0; 692 } 693 694 model = spec; 695 return 1; 696} 697 698/* in order to get sysfs right */ 699static int using_nmi; 700 701int __init op_nmi_init(struct oprofile_operations *ops) 702{ 703 __u8 vendor = boot_cpu_data.x86_vendor; 704 __u8 family = boot_cpu_data.x86; 705 char *cpu_type = NULL; 706 int ret = 0; 707 708 using_nmi = 0; 709 710 if (!cpu_has_apic) 711 return -ENODEV; 712 713 switch (vendor) { 714 case X86_VENDOR_AMD: 715 /* Needs to be at least an Athlon (or hammer in 32bit mode) */ 716 717 switch (family) { 718 case 6: 719 cpu_type = "i386/athlon"; 720 break; 721 case 0xf: 722 /* 723 * Actually it could be i386/hammer too, but 724 * give user space an consistent name. 725 */ 726 cpu_type = "x86-64/hammer"; 727 break; 728 case 0x10: 729 cpu_type = "x86-64/family10"; 730 break; 731 case 0x11: 732 cpu_type = "x86-64/family11h"; 733 break; 734 default: 735 return -ENODEV; 736 } 737 model = &op_amd_spec; 738 break; 739 740 case X86_VENDOR_INTEL: 741 switch (family) { 742 /* Pentium IV */ 743 case 0xf: 744 p4_init(&cpu_type); 745 break; 746 747 /* A P6-class processor */ 748 case 6: 749 ppro_init(&cpu_type); 750 break; 751 752 default: 753 break; 754 } 755 756 if (cpu_type) 757 break; 758 759 if (!cpu_has_arch_perfmon) 760 return -ENODEV; 761 762 /* use arch perfmon as fallback */ 763 cpu_type = "i386/arch_perfmon"; 764 model = &op_arch_perfmon_spec; 765 break; 766 767 default: 768 return -ENODEV; 769 } 770 771 /* default values, can be overwritten by model */ 772 ops->create_files = nmi_create_files; 773 ops->setup = nmi_setup; 774 ops->shutdown = nmi_shutdown; 775 ops->start = nmi_start; 776 ops->stop = nmi_stop; 777 ops->cpu_type = cpu_type; 778 779 if (model->init) 780 ret = model->init(ops); 781 if (ret) 782 return ret; 783 784 if (!model->num_virt_counters) 785 model->num_virt_counters = model->num_counters; 786 787 mux_init(ops); 788 789 ret = init_sysfs(); 790 if (ret) 791 return ret; 792 793 using_nmi = 1; 794 printk(KERN_INFO "oprofile: using NMI interrupt.\n"); 795 return 0; 796} 797 798void op_nmi_exit(void) 799{ 800 if (using_nmi) 801 exit_sysfs(); 802} 803