1/** 2 * @file nmi_int.c 3 * 4 * @remark Copyright 2002 OProfile authors 5 * @remark Read the file COPYING 6 * 7 * @author John Levon <levon@movementarian.org> 8 */ 9 10#include <linux/init.h> 11#include <linux/notifier.h> 12#include <linux/smp.h> 13#include <linux/oprofile.h> 14#include <linux/sysdev.h> 15#include <linux/slab.h> 16#include <linux/moduleparam.h> 17#include <linux/kdebug.h> 18#include <asm/nmi.h> 19#include <asm/msr.h> 20#include <asm/apic.h> 21 22#include "op_counter.h" 23#include "op_x86_model.h" 24 25static struct op_x86_model_spec const * model; 26static struct op_msrs cpu_msrs[NR_CPUS]; 27static unsigned long saved_lvtpc[NR_CPUS]; 28 29static int nmi_start(void); 30static void nmi_stop(void); 31 32/* 0 == registered but off, 1 == registered and on */ 33static int nmi_enabled = 0; 34 35#ifdef CONFIG_PM 36 37static int nmi_suspend(struct sys_device *dev, pm_message_t state) 38{ 39 if (nmi_enabled == 1) 40 nmi_stop(); 41 return 0; 42} 43 44 45static int nmi_resume(struct sys_device *dev) 46{ 47 if (nmi_enabled == 1) 48 nmi_start(); 49 return 0; 50} 51 52 53static struct sysdev_class oprofile_sysclass = { 54 set_kset_name("oprofile"), 55 .resume = nmi_resume, 56 .suspend = nmi_suspend, 57}; 58 59 60static struct sys_device device_oprofile = { 61 .id = 0, 62 .cls = &oprofile_sysclass, 63}; 64 65 66static int __init init_sysfs(void) 67{ 68 int error; 69 if (!(error = sysdev_class_register(&oprofile_sysclass))) 70 error = sysdev_register(&device_oprofile); 71 return error; 72} 73 74 75static void exit_sysfs(void) 76{ 77 sysdev_unregister(&device_oprofile); 78 sysdev_class_unregister(&oprofile_sysclass); 79} 80 81#else 82#define init_sysfs() do { } while (0) 83#define exit_sysfs() do { } while (0) 84#endif /* CONFIG_PM */ 85 86static int profile_exceptions_notify(struct notifier_block *self, 87 unsigned long val, void *data) 88{ 89 struct die_args *args = (struct die_args *)data; 90 int ret = NOTIFY_DONE; 91 int cpu = smp_processor_id(); 92 93 switch(val) { 94 case DIE_NMI: 95 if (model->check_ctrs(args->regs, &cpu_msrs[cpu])) 96 ret = NOTIFY_STOP; 97 break; 98 default: 99 break; 100 } 101 return ret; 102} 103 104static void nmi_cpu_save_registers(struct op_msrs * msrs) 105{ 106 unsigned int const nr_ctrs = model->num_counters; 107 unsigned int const nr_ctrls = model->num_controls; 108 struct op_msr * counters = msrs->counters; 109 struct op_msr * controls = msrs->controls; 110 unsigned int i; 111 112 for (i = 0; i < nr_ctrs; ++i) { 113 if (counters[i].addr){ 114 rdmsr(counters[i].addr, 115 counters[i].saved.low, 116 counters[i].saved.high); 117 } 118 } 119 120 for (i = 0; i < nr_ctrls; ++i) { 121 if (controls[i].addr){ 122 rdmsr(controls[i].addr, 123 controls[i].saved.low, 124 controls[i].saved.high); 125 } 126 } 127} 128 129 130static void nmi_save_registers(void * dummy) 131{ 132 int cpu = smp_processor_id(); 133 struct op_msrs * msrs = &cpu_msrs[cpu]; 134 nmi_cpu_save_registers(msrs); 135} 136 137 138static void free_msrs(void) 139{ 140 int i; 141 for_each_possible_cpu(i) { 142 kfree(cpu_msrs[i].counters); 143 cpu_msrs[i].counters = NULL; 144 kfree(cpu_msrs[i].controls); 145 cpu_msrs[i].controls = NULL; 146 } 147} 148 149 150static int allocate_msrs(void) 151{ 152 int success = 1; 153 size_t controls_size = sizeof(struct op_msr) * model->num_controls; 154 size_t counters_size = sizeof(struct op_msr) * model->num_counters; 155 156 int i; 157 for_each_possible_cpu(i) { 158 cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL); 159 if (!cpu_msrs[i].counters) { 160 success = 0; 161 break; 162 } 163 cpu_msrs[i].controls = kmalloc(controls_size, GFP_KERNEL); 164 if (!cpu_msrs[i].controls) { 165 success = 0; 166 break; 167 } 168 } 169 170 if (!success) 171 free_msrs(); 172 173 return success; 174} 175 176 177static void nmi_cpu_setup(void * dummy) 178{ 179 int cpu = smp_processor_id(); 180 struct op_msrs * msrs = &cpu_msrs[cpu]; 181 spin_lock(&oprofilefs_lock); 182 model->setup_ctrs(msrs); 183 spin_unlock(&oprofilefs_lock); 184 saved_lvtpc[cpu] = apic_read(APIC_LVTPC); 185 apic_write(APIC_LVTPC, APIC_DM_NMI); 186} 187 188static struct notifier_block profile_exceptions_nb = { 189 .notifier_call = profile_exceptions_notify, 190 .next = NULL, 191 .priority = 0 192}; 193 194static int nmi_setup(void) 195{ 196 int err=0; 197 int cpu; 198 199 if (!allocate_msrs()) 200 return -ENOMEM; 201 202 if ((err = register_die_notifier(&profile_exceptions_nb))){ 203 free_msrs(); 204 return err; 205 } 206 207 /* We need to serialize save and setup for HT because the subset 208 * of msrs are distinct for save and setup operations 209 */ 210 211 /* Assume saved/restored counters are the same on all CPUs */ 212 model->fill_in_addresses(&cpu_msrs[0]); 213 for_each_possible_cpu (cpu) { 214 if (cpu != 0) { 215 memcpy(cpu_msrs[cpu].counters, cpu_msrs[0].counters, 216 sizeof(struct op_msr) * model->num_counters); 217 218 memcpy(cpu_msrs[cpu].controls, cpu_msrs[0].controls, 219 sizeof(struct op_msr) * model->num_controls); 220 } 221 222 } 223 on_each_cpu(nmi_save_registers, NULL, 0, 1); 224 on_each_cpu(nmi_cpu_setup, NULL, 0, 1); 225 nmi_enabled = 1; 226 return 0; 227} 228 229 230static void nmi_restore_registers(struct op_msrs * msrs) 231{ 232 unsigned int const nr_ctrs = model->num_counters; 233 unsigned int const nr_ctrls = model->num_controls; 234 struct op_msr * counters = msrs->counters; 235 struct op_msr * controls = msrs->controls; 236 unsigned int i; 237 238 for (i = 0; i < nr_ctrls; ++i) { 239 if (controls[i].addr){ 240 wrmsr(controls[i].addr, 241 controls[i].saved.low, 242 controls[i].saved.high); 243 } 244 } 245 246 for (i = 0; i < nr_ctrs; ++i) { 247 if (counters[i].addr){ 248 wrmsr(counters[i].addr, 249 counters[i].saved.low, 250 counters[i].saved.high); 251 } 252 } 253} 254 255 256static void nmi_cpu_shutdown(void * dummy) 257{ 258 unsigned int v; 259 int cpu = smp_processor_id(); 260 struct op_msrs * msrs = &cpu_msrs[cpu]; 261 262 /* restoring APIC_LVTPC can trigger an apic error because the delivery 263 * mode and vector nr combination can be illegal. That's by design: on 264 * power on apic lvt contain a zero vector nr which are legal only for 265 * NMI delivery mode. So inhibit apic err before restoring lvtpc 266 */ 267 v = apic_read(APIC_LVTERR); 268 apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); 269 apic_write(APIC_LVTPC, saved_lvtpc[cpu]); 270 apic_write(APIC_LVTERR, v); 271 nmi_restore_registers(msrs); 272 model->shutdown(msrs); 273} 274 275 276static void nmi_shutdown(void) 277{ 278 nmi_enabled = 0; 279 on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); 280 unregister_die_notifier(&profile_exceptions_nb); 281 free_msrs(); 282} 283 284 285static void nmi_cpu_start(void * dummy) 286{ 287 struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()]; 288 model->start(msrs); 289} 290 291 292static int nmi_start(void) 293{ 294 on_each_cpu(nmi_cpu_start, NULL, 0, 1); 295 return 0; 296} 297 298 299static void nmi_cpu_stop(void * dummy) 300{ 301 struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()]; 302 model->stop(msrs); 303} 304 305 306static void nmi_stop(void) 307{ 308 on_each_cpu(nmi_cpu_stop, NULL, 0, 1); 309} 310 311 312struct op_counter_config counter_config[OP_MAX_COUNTER]; 313 314static int nmi_create_files(struct super_block * sb, struct dentry * root) 315{ 316 unsigned int i; 317 318 for (i = 0; i < model->num_counters; ++i) { 319 struct dentry * dir; 320 char buf[4]; 321 322 /* quick little hack to _not_ expose a counter if it is not 323 * available for use. This should protect userspace app. 324 * NOTE: assumes 1:1 mapping here (that counters are organized 325 * sequentially in their struct assignment). 326 */ 327 if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i))) 328 continue; 329 330 snprintf(buf, sizeof(buf), "%d", i); 331 dir = oprofilefs_mkdir(sb, root, buf); 332 oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); 333 oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); 334 oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count); 335 oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); 336 oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); 337 oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); 338 } 339 340 return 0; 341} 342 343static int p4force; 344module_param(p4force, int, 0); 345 346static int __init p4_init(char ** cpu_type) 347{ 348 __u8 cpu_model = boot_cpu_data.x86_model; 349 350 if (!p4force && (cpu_model > 6 || cpu_model == 5)) 351 return 0; 352 353#ifndef CONFIG_SMP 354 *cpu_type = "i386/p4"; 355 model = &op_p4_spec; 356 return 1; 357#else 358 switch (smp_num_siblings) { 359 case 1: 360 *cpu_type = "i386/p4"; 361 model = &op_p4_spec; 362 return 1; 363 364 case 2: 365 *cpu_type = "i386/p4-ht"; 366 model = &op_p4_ht2_spec; 367 return 1; 368 } 369#endif 370 371 printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n"); 372 printk(KERN_INFO "oprofile: Reverting to timer mode.\n"); 373 return 0; 374} 375 376 377static int __init ppro_init(char ** cpu_type) 378{ 379 __u8 cpu_model = boot_cpu_data.x86_model; 380 381 if (cpu_model == 14) 382 *cpu_type = "i386/core"; 383 else if (cpu_model == 15) 384 *cpu_type = "i386/core_2"; 385 else if (cpu_model > 0xd) 386 return 0; 387 else if (cpu_model == 9) { 388 *cpu_type = "i386/p6_mobile"; 389 } else if (cpu_model > 5) { 390 *cpu_type = "i386/piii"; 391 } else if (cpu_model > 2) { 392 *cpu_type = "i386/pii"; 393 } else { 394 *cpu_type = "i386/ppro"; 395 } 396 397 model = &op_ppro_spec; 398 return 1; 399} 400 401/* in order to get sysfs right */ 402static int using_nmi; 403 404int __init op_nmi_init(struct oprofile_operations *ops) 405{ 406 __u8 vendor = boot_cpu_data.x86_vendor; 407 __u8 family = boot_cpu_data.x86; 408 char *cpu_type; 409 410 if (!cpu_has_apic) 411 return -ENODEV; 412 413 switch (vendor) { 414 case X86_VENDOR_AMD: 415 /* Needs to be at least an Athlon (or hammer in 32bit mode) */ 416 417 switch (family) { 418 default: 419 return -ENODEV; 420 case 6: 421 model = &op_athlon_spec; 422 cpu_type = "i386/athlon"; 423 break; 424 case 0xf: 425 model = &op_athlon_spec; 426 /* Actually it could be i386/hammer too, but give 427 user space an consistent name. */ 428 cpu_type = "x86-64/hammer"; 429 break; 430 case 0x10: 431 model = &op_athlon_spec; 432 cpu_type = "x86-64/family10"; 433 break; 434 } 435 break; 436 437 case X86_VENDOR_INTEL: 438 switch (family) { 439 /* Pentium IV */ 440 case 0xf: 441 if (!p4_init(&cpu_type)) 442 return -ENODEV; 443 break; 444 445 /* A P6-class processor */ 446 case 6: 447 if (!ppro_init(&cpu_type)) 448 return -ENODEV; 449 break; 450 451 default: 452 return -ENODEV; 453 } 454 break; 455 456 default: 457 return -ENODEV; 458 } 459 460 init_sysfs(); 461 using_nmi = 1; 462 ops->create_files = nmi_create_files; 463 ops->setup = nmi_setup; 464 ops->shutdown = nmi_shutdown; 465 ops->start = nmi_start; 466 ops->stop = nmi_stop; 467 ops->cpu_type = cpu_type; 468 printk(KERN_INFO "oprofile: using NMI interrupt.\n"); 469 return 0; 470} 471 472 473void op_nmi_exit(void) 474{ 475 if (using_nmi) 476 exit_sysfs(); 477} 478