1/* 2 * This file implements the perfmon-2 subsystem which is used 3 * to program the IA-64 Performance Monitoring Unit (PMU). 4 * 5 * The initial version of perfmon.c was written by 6 * Ganesh Venkitachalam, IBM Corp. 7 * 8 * Then it was modified for perfmon-1.x by Stephane Eranian and 9 * David Mosberger, Hewlett Packard Co. 10 * 11 * Version Perfmon-2.x is a rewrite of perfmon-1.x 12 * by Stephane Eranian, Hewlett Packard Co. 13 * 14 * Copyright (C) 1999-2005 Hewlett Packard Co 15 * Stephane Eranian <eranian@hpl.hp.com> 16 * David Mosberger-Tang <davidm@hpl.hp.com> 17 * 18 * More information about perfmon available at: 19 * http://www.hpl.hp.com/research/linux/perfmon 20 */ 21 22#include <linux/module.h> 23#include <linux/kernel.h> 24#include <linux/sched.h> 25#include <linux/interrupt.h> 26#include <linux/proc_fs.h> 27#include <linux/seq_file.h> 28#include <linux/init.h> 29#include <linux/vmalloc.h> 30#include <linux/mm.h> 31#include <linux/sysctl.h> 32#include <linux/list.h> 33#include <linux/file.h> 34#include <linux/poll.h> 35#include <linux/vfs.h> 36#include <linux/smp.h> 37#include <linux/pagemap.h> 38#include <linux/mount.h> 39#include <linux/bitops.h> 40#include <linux/capability.h> 41#include <linux/rcupdate.h> 42#include <linux/completion.h> 43#include <linux/tracehook.h> 44#include <linux/slab.h> 45 46#include <asm/errno.h> 47#include <asm/intrinsics.h> 48#include <asm/page.h> 49#include <asm/perfmon.h> 50#include <asm/processor.h> 51#include <asm/signal.h> 52#include <asm/system.h> 53#include <asm/uaccess.h> 54#include <asm/delay.h> 55 56#ifdef CONFIG_PERFMON 57/* 58 * perfmon context state 59 */ 60#define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */ 61#define PFM_CTX_LOADED 2 /* context is loaded onto a task */ 62#define PFM_CTX_MASKED 3 /* context is loaded but monitoring is masked due to overflow */ 63#define PFM_CTX_ZOMBIE 4 /* owner of the context is closing it */ 64 65#define PFM_INVALID_ACTIVATION (~0UL) 66 67#define PFM_NUM_PMC_REGS 64 /* PMC save area for ctxsw */ 68#define PFM_NUM_PMD_REGS 64 /* PMD save area for ctxsw */ 69 70/* 71 * depth of message queue 72 */ 73#define PFM_MAX_MSGS 32 74#define PFM_CTXQ_EMPTY(g) ((g)->ctx_msgq_head == (g)->ctx_msgq_tail) 75 76/* 77 * type of a PMU register (bitmask). 78 * bitmask structure: 79 * bit0 : register implemented 80 * bit1 : end marker 81 * bit2-3 : reserved 82 * bit4 : pmc has pmc.pm 83 * bit5 : pmc controls a counter (has pmc.oi), pmd is used as counter 84 * bit6-7 : register type 85 * bit8-31: reserved 86 */ 87#define PFM_REG_NOTIMPL 0x0 /* not implemented at all */ 88#define PFM_REG_IMPL 0x1 /* register implemented */ 89#define PFM_REG_END 0x2 /* end marker */ 90#define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */ 91#define PFM_REG_COUNTING (0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */ 92#define PFM_REG_CONTROL (0x4<<4|PFM_REG_IMPL) /* PMU control register */ 93#define PFM_REG_CONFIG (0x8<<4|PFM_REG_IMPL) /* configuration register */ 94#define PFM_REG_BUFFER (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */ 95 96#define PMC_IS_LAST(i) (pmu_conf->pmc_desc[i].type & PFM_REG_END) 97#define PMD_IS_LAST(i) (pmu_conf->pmd_desc[i].type & PFM_REG_END) 98 99#define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY) 100 101/* i assumed unsigned */ 102#define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL)) 103#define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL)) 104 105#define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) 106#define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) 107#define PMC_IS_MONITOR(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR) == PFM_REG_MONITOR) 108#define PMC_IS_CONTROL(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL) == PFM_REG_CONTROL) 109 110#define PMC_DFL_VAL(i) pmu_conf->pmc_desc[i].default_value 111#define PMC_RSVD_MASK(i) pmu_conf->pmc_desc[i].reserved_mask 112#define PMD_PMD_DEP(i) pmu_conf->pmd_desc[i].dep_pmd[0] 113#define PMC_PMD_DEP(i) pmu_conf->pmc_desc[i].dep_pmd[0] 114 115#define PFM_NUM_IBRS IA64_NUM_DBG_REGS 116#define PFM_NUM_DBRS IA64_NUM_DBG_REGS 117 118#define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0) 119#define CTX_HAS_SMPL(c) ((c)->ctx_fl_is_sampling) 120#define PFM_CTX_TASK(h) (h)->ctx_task 121 122#define PMU_PMC_OI 5 /* position of pmc.oi bit */ 123 124#define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask) 125#define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL) 126 127#define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask) 128 129#define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64) 130#define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64) 131#define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1) 132#define PFM_CODE_RR 0 /* requesting code range restriction */ 133#define PFM_DATA_RR 1 /* requestion data range restriction */ 134 135#define PFM_CPUINFO_CLEAR(v) pfm_get_cpu_var(pfm_syst_info) &= ~(v) 136#define PFM_CPUINFO_SET(v) pfm_get_cpu_var(pfm_syst_info) |= (v) 137#define PFM_CPUINFO_GET() pfm_get_cpu_var(pfm_syst_info) 138 139#define RDEP(x) (1UL<<(x)) 140 141/* 142 * context protection macros 143 * in SMP: 144 * - we need to protect against CPU concurrency (spin_lock) 145 * - we need to protect against PMU overflow interrupts (local_irq_disable) 146 * in UP: 147 * - we need to protect against PMU overflow interrupts (local_irq_disable) 148 * 149 * spin_lock_irqsave()/spin_unlock_irqrestore(): 150 * in SMP: local_irq_disable + spin_lock 151 * in UP : local_irq_disable 152 * 153 * spin_lock()/spin_lock(): 154 * in UP : removed automatically 155 * in SMP: protect against context accesses from other CPU. interrupts 156 * are not masked. This is useful for the PMU interrupt handler 157 * because we know we will not get PMU concurrency in that code. 158 */ 159#define PROTECT_CTX(c, f) \ 160 do { \ 161 DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, task_pid_nr(current))); \ 162 spin_lock_irqsave(&(c)->ctx_lock, f); \ 163 DPRINT(("spinlocked ctx %p by [%d]\n", c, task_pid_nr(current))); \ 164 } while(0) 165 166#define UNPROTECT_CTX(c, f) \ 167 do { \ 168 DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, task_pid_nr(current))); \ 169 spin_unlock_irqrestore(&(c)->ctx_lock, f); \ 170 } while(0) 171 172#define PROTECT_CTX_NOPRINT(c, f) \ 173 do { \ 174 spin_lock_irqsave(&(c)->ctx_lock, f); \ 175 } while(0) 176 177 178#define UNPROTECT_CTX_NOPRINT(c, f) \ 179 do { \ 180 spin_unlock_irqrestore(&(c)->ctx_lock, f); \ 181 } while(0) 182 183 184#define PROTECT_CTX_NOIRQ(c) \ 185 do { \ 186 spin_lock(&(c)->ctx_lock); \ 187 } while(0) 188 189#define UNPROTECT_CTX_NOIRQ(c) \ 190 do { \ 191 spin_unlock(&(c)->ctx_lock); \ 192 } while(0) 193 194 195#ifdef CONFIG_SMP 196 197#define GET_ACTIVATION() pfm_get_cpu_var(pmu_activation_number) 198#define INC_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)++ 199#define SET_ACTIVATION(c) (c)->ctx_last_activation = GET_ACTIVATION() 200 201#else /* !CONFIG_SMP */ 202#define SET_ACTIVATION(t) do {} while(0) 203#define GET_ACTIVATION(t) do {} while(0) 204#define INC_ACTIVATION(t) do {} while(0) 205#endif /* CONFIG_SMP */ 206 207#define SET_PMU_OWNER(t, c) do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0) 208#define GET_PMU_OWNER() pfm_get_cpu_var(pmu_owner) 209#define GET_PMU_CTX() pfm_get_cpu_var(pmu_ctx) 210 211#define LOCK_PFS(g) spin_lock_irqsave(&pfm_sessions.pfs_lock, g) 212#define UNLOCK_PFS(g) spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g) 213 214#define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0) 215 216/* 217 * cmp0 must be the value of pmc0 218 */ 219#define PMC0_HAS_OVFL(cmp0) (cmp0 & ~0x1UL) 220 221#define PFMFS_MAGIC 0xa0b4d889 222 223/* 224 * debugging 225 */ 226#define PFM_DEBUGGING 1 227#ifdef PFM_DEBUGGING 228#define DPRINT(a) \ 229 do { \ 230 if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __func__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \ 231 } while (0) 232 233#define DPRINT_ovfl(a) \ 234 do { \ 235 if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __func__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \ 236 } while (0) 237#endif 238 239/* 240 * 64-bit software counter structure 241 * 242 * the next_reset_type is applied to the next call to pfm_reset_regs() 243 */ 244typedef struct { 245 unsigned long val; /* virtual 64bit counter value */ 246 unsigned long lval; /* last reset value */ 247 unsigned long long_reset; /* reset value on sampling overflow */ 248 unsigned long short_reset; /* reset value on overflow */ 249 unsigned long reset_pmds[4]; /* which other pmds to reset when this counter overflows */ 250 unsigned long smpl_pmds[4]; /* which pmds are accessed when counter overflow */ 251 unsigned long seed; /* seed for random-number generator */ 252 unsigned long mask; /* mask for random-number generator */ 253 unsigned int flags; /* notify/do not notify */ 254 unsigned long eventid; /* overflow event identifier */ 255} pfm_counter_t; 256 257/* 258 * context flags 259 */ 260typedef struct { 261 unsigned int block:1; /* when 1, task will blocked on user notifications */ 262 unsigned int system:1; /* do system wide monitoring */ 263 unsigned int using_dbreg:1; /* using range restrictions (debug registers) */ 264 unsigned int is_sampling:1; /* true if using a custom format */ 265 unsigned int excl_idle:1; /* exclude idle task in system wide session */ 266 unsigned int going_zombie:1; /* context is zombie (MASKED+blocking) */ 267 unsigned int trap_reason:2; /* reason for going into pfm_handle_work() */ 268 unsigned int no_msg:1; /* no message sent on overflow */ 269 unsigned int can_restart:1; /* allowed to issue a PFM_RESTART */ 270 unsigned int reserved:22; 271} pfm_context_flags_t; 272 273#define PFM_TRAP_REASON_NONE 0x0 /* default value */ 274#define PFM_TRAP_REASON_BLOCK 0x1 /* we need to block on overflow */ 275#define PFM_TRAP_REASON_RESET 0x2 /* we need to reset PMDs */ 276 277 278/* 279 * perfmon context: encapsulates all the state of a monitoring session 280 */ 281 282typedef struct pfm_context { 283 spinlock_t ctx_lock; /* context protection */ 284 285 pfm_context_flags_t ctx_flags; /* bitmask of flags (block reason incl.) */ 286 unsigned int ctx_state; /* state: active/inactive (no bitfield) */ 287 288 struct task_struct *ctx_task; /* task to which context is attached */ 289 290 unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */ 291 292 struct completion ctx_restart_done; /* use for blocking notification mode */ 293 294 unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */ 295 unsigned long ctx_all_pmds[4]; /* bitmask of all accessible PMDs */ 296 unsigned long ctx_reload_pmds[4]; /* bitmask of force reload PMD on ctxsw in */ 297 298 unsigned long ctx_all_pmcs[4]; /* bitmask of all accessible PMCs */ 299 unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */ 300 unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */ 301 302 unsigned long ctx_pmcs[PFM_NUM_PMC_REGS]; /* saved copies of PMC values */ 303 304 unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */ 305 unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */ 306 unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */ 307 unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */ 308 309 pfm_counter_t ctx_pmds[PFM_NUM_PMD_REGS]; /* software state for PMDS */ 310 311 unsigned long th_pmcs[PFM_NUM_PMC_REGS]; /* PMC thread save state */ 312 unsigned long th_pmds[PFM_NUM_PMD_REGS]; /* PMD thread save state */ 313 314 unsigned long ctx_saved_psr_up; /* only contains psr.up value */ 315 316 unsigned long ctx_last_activation; /* context last activation number for last_cpu */ 317 unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */ 318 unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */ 319 320 int ctx_fd; /* file descriptor used my this context */ 321 pfm_ovfl_arg_t ctx_ovfl_arg; /* argument to custom buffer format handler */ 322 323 pfm_buffer_fmt_t *ctx_buf_fmt; /* buffer format callbacks */ 324 void *ctx_smpl_hdr; /* points to sampling buffer header kernel vaddr */ 325 unsigned long ctx_smpl_size; /* size of sampling buffer */ 326 void *ctx_smpl_vaddr; /* user level virtual address of smpl buffer */ 327 328 wait_queue_head_t ctx_msgq_wait; 329 pfm_msg_t ctx_msgq[PFM_MAX_MSGS]; 330 int ctx_msgq_head; 331 int ctx_msgq_tail; 332 struct fasync_struct *ctx_async_queue; 333 334 wait_queue_head_t ctx_zombieq; /* termination cleanup wait queue */ 335} pfm_context_t; 336 337/* 338 * magic number used to verify that structure is really 339 * a perfmon context 340 */ 341#define PFM_IS_FILE(f) ((f)->f_op == &pfm_file_ops) 342 343#define PFM_GET_CTX(t) ((pfm_context_t *)(t)->thread.pfm_context) 344 345#ifdef CONFIG_SMP 346#define SET_LAST_CPU(ctx, v) (ctx)->ctx_last_cpu = (v) 347#define GET_LAST_CPU(ctx) (ctx)->ctx_last_cpu 348#else 349#define SET_LAST_CPU(ctx, v) do {} while(0) 350#define GET_LAST_CPU(ctx) do {} while(0) 351#endif 352 353 354#define ctx_fl_block ctx_flags.block 355#define ctx_fl_system ctx_flags.system 356#define ctx_fl_using_dbreg ctx_flags.using_dbreg 357#define ctx_fl_is_sampling ctx_flags.is_sampling 358#define ctx_fl_excl_idle ctx_flags.excl_idle 359#define ctx_fl_going_zombie ctx_flags.going_zombie 360#define ctx_fl_trap_reason ctx_flags.trap_reason 361#define ctx_fl_no_msg ctx_flags.no_msg 362#define ctx_fl_can_restart ctx_flags.can_restart 363 364#define PFM_SET_WORK_PENDING(t, v) do { (t)->thread.pfm_needs_checking = v; } while(0); 365#define PFM_GET_WORK_PENDING(t) (t)->thread.pfm_needs_checking 366 367/* 368 * global information about all sessions 369 * mostly used to synchronize between system wide and per-process 370 */ 371typedef struct { 372 spinlock_t pfs_lock; /* lock the structure */ 373 374 unsigned int pfs_task_sessions; /* number of per task sessions */ 375 unsigned int pfs_sys_sessions; /* number of per system wide sessions */ 376 unsigned int pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */ 377 unsigned int pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */ 378 struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */ 379} pfm_session_t; 380 381/* 382 * information about a PMC or PMD. 383 * dep_pmd[]: a bitmask of dependent PMD registers 384 * dep_pmc[]: a bitmask of dependent PMC registers 385 */ 386typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); 387typedef struct { 388 unsigned int type; 389 int pm_pos; 390 unsigned long default_value; /* power-on default value */ 391 unsigned long reserved_mask; /* bitmask of reserved bits */ 392 pfm_reg_check_t read_check; 393 pfm_reg_check_t write_check; 394 unsigned long dep_pmd[4]; 395 unsigned long dep_pmc[4]; 396} pfm_reg_desc_t; 397 398/* assume cnum is a valid monitor */ 399#define PMC_PM(cnum, val) (((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1) 400 401/* 402 * This structure is initialized at boot time and contains 403 * a description of the PMU main characteristics. 404 * 405 * If the probe function is defined, detection is based 406 * on its return value: 407 * - 0 means recognized PMU 408 * - anything else means not supported 409 * When the probe function is not defined, then the pmu_family field 410 * is used and it must match the host CPU family such that: 411 * - cpu->family & config->pmu_family != 0 412 */ 413typedef struct { 414 unsigned long ovfl_val; /* overflow value for counters */ 415 416 pfm_reg_desc_t *pmc_desc; /* detailed PMC register dependencies descriptions */ 417 pfm_reg_desc_t *pmd_desc; /* detailed PMD register dependencies descriptions */ 418 419 unsigned int num_pmcs; /* number of PMCS: computed at init time */ 420 unsigned int num_pmds; /* number of PMDS: computed at init time */ 421 unsigned long impl_pmcs[4]; /* bitmask of implemented PMCS */ 422 unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */ 423 424 char *pmu_name; /* PMU family name */ 425 unsigned int pmu_family; /* cpuid family pattern used to identify pmu */ 426 unsigned int flags; /* pmu specific flags */ 427 unsigned int num_ibrs; /* number of IBRS: computed at init time */ 428 unsigned int num_dbrs; /* number of DBRS: computed at init time */ 429 unsigned int num_counters; /* PMC/PMD counting pairs : computed at init time */ 430 int (*probe)(void); /* customized probe routine */ 431 unsigned int use_rr_dbregs:1; /* set if debug registers used for range restriction */ 432} pmu_config_t; 433/* 434 * PMU specific flags 435 */ 436#define PFM_PMU_IRQ_RESEND 1 /* PMU needs explicit IRQ resend */ 437 438/* 439 * debug register related type definitions 440 */ 441typedef struct { 442 unsigned long ibr_mask:56; 443 unsigned long ibr_plm:4; 444 unsigned long ibr_ig:3; 445 unsigned long ibr_x:1; 446} ibr_mask_reg_t; 447 448typedef struct { 449 unsigned long dbr_mask:56; 450 unsigned long dbr_plm:4; 451 unsigned long dbr_ig:2; 452 unsigned long dbr_w:1; 453 unsigned long dbr_r:1; 454} dbr_mask_reg_t; 455 456typedef union { 457 unsigned long val; 458 ibr_mask_reg_t ibr; 459 dbr_mask_reg_t dbr; 460} dbreg_t; 461 462 463/* 464 * perfmon command descriptions 465 */ 466typedef struct { 467 int (*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 468 char *cmd_name; 469 int cmd_flags; 470 unsigned int cmd_narg; 471 size_t cmd_argsize; 472 int (*cmd_getsize)(void *arg, size_t *sz); 473} pfm_cmd_desc_t; 474 475#define PFM_CMD_FD 0x01 /* command requires a file descriptor */ 476#define PFM_CMD_ARG_READ 0x02 /* command must read argument(s) */ 477#define PFM_CMD_ARG_RW 0x04 /* command must read/write argument(s) */ 478#define PFM_CMD_STOP 0x08 /* command does not work on zombie context */ 479 480 481#define PFM_CMD_NAME(cmd) pfm_cmd_tab[(cmd)].cmd_name 482#define PFM_CMD_READ_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ) 483#define PFM_CMD_RW_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW) 484#define PFM_CMD_USE_FD(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD) 485#define PFM_CMD_STOPPED(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP) 486 487#define PFM_CMD_ARG_MANY -1 /* cannot be zero */ 488 489typedef struct { 490 unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */ 491 unsigned long pfm_replay_ovfl_intr_count; /* keep track of replayed ovfl interrupts */ 492 unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */ 493 unsigned long pfm_ovfl_intr_cycles; /* cycles spent processing ovfl interrupts */ 494 unsigned long pfm_ovfl_intr_cycles_min; /* min cycles spent processing ovfl interrupts */ 495 unsigned long pfm_ovfl_intr_cycles_max; /* max cycles spent processing ovfl interrupts */ 496 unsigned long pfm_smpl_handler_calls; 497 unsigned long pfm_smpl_handler_cycles; 498 char pad[SMP_CACHE_BYTES] ____cacheline_aligned; 499} pfm_stats_t; 500 501/* 502 * perfmon internal variables 503 */ 504static pfm_stats_t pfm_stats[NR_CPUS]; 505static pfm_session_t pfm_sessions; /* global sessions information */ 506 507static DEFINE_SPINLOCK(pfm_alt_install_check); 508static pfm_intr_handler_desc_t *pfm_alt_intr_handler; 509 510static struct proc_dir_entry *perfmon_dir; 511static pfm_uuid_t pfm_null_uuid = {0,}; 512 513static spinlock_t pfm_buffer_fmt_lock; 514static LIST_HEAD(pfm_buffer_fmt_list); 515 516static pmu_config_t *pmu_conf; 517 518/* sysctl() controls */ 519pfm_sysctl_t pfm_sysctl; 520EXPORT_SYMBOL(pfm_sysctl); 521 522static ctl_table pfm_ctl_table[]={ 523 { 524 .procname = "debug", 525 .data = &pfm_sysctl.debug, 526 .maxlen = sizeof(int), 527 .mode = 0666, 528 .proc_handler = proc_dointvec, 529 }, 530 { 531 .procname = "debug_ovfl", 532 .data = &pfm_sysctl.debug_ovfl, 533 .maxlen = sizeof(int), 534 .mode = 0666, 535 .proc_handler = proc_dointvec, 536 }, 537 { 538 .procname = "fastctxsw", 539 .data = &pfm_sysctl.fastctxsw, 540 .maxlen = sizeof(int), 541 .mode = 0600, 542 .proc_handler = proc_dointvec, 543 }, 544 { 545 .procname = "expert_mode", 546 .data = &pfm_sysctl.expert_mode, 547 .maxlen = sizeof(int), 548 .mode = 0600, 549 .proc_handler = proc_dointvec, 550 }, 551 {} 552}; 553static ctl_table pfm_sysctl_dir[] = { 554 { 555 .procname = "perfmon", 556 .mode = 0555, 557 .child = pfm_ctl_table, 558 }, 559 {} 560}; 561static ctl_table pfm_sysctl_root[] = { 562 { 563 .procname = "kernel", 564 .mode = 0555, 565 .child = pfm_sysctl_dir, 566 }, 567 {} 568}; 569static struct ctl_table_header *pfm_sysctl_header; 570 571static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 572 573#define pfm_get_cpu_var(v) __ia64_per_cpu_var(v) 574#define pfm_get_cpu_data(a,b) per_cpu(a, b) 575 576static inline void 577pfm_put_task(struct task_struct *task) 578{ 579 if (task != current) put_task_struct(task); 580} 581 582static inline void 583pfm_reserve_page(unsigned long a) 584{ 585 SetPageReserved(vmalloc_to_page((void *)a)); 586} 587static inline void 588pfm_unreserve_page(unsigned long a) 589{ 590 ClearPageReserved(vmalloc_to_page((void*)a)); 591} 592 593static inline unsigned long 594pfm_protect_ctx_ctxsw(pfm_context_t *x) 595{ 596 spin_lock(&(x)->ctx_lock); 597 return 0UL; 598} 599 600static inline void 601pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f) 602{ 603 spin_unlock(&(x)->ctx_lock); 604} 605 606static inline unsigned int 607pfm_do_munmap(struct mm_struct *mm, unsigned long addr, size_t len, int acct) 608{ 609 return do_munmap(mm, addr, len); 610} 611 612static inline unsigned long 613pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec) 614{ 615 return get_unmapped_area(file, addr, len, pgoff, flags); 616} 617 618 619static int 620pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, 621 struct vfsmount *mnt) 622{ 623 return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC, mnt); 624} 625 626static struct file_system_type pfm_fs_type = { 627 .name = "pfmfs", 628 .get_sb = pfmfs_get_sb, 629 .kill_sb = kill_anon_super, 630}; 631 632DEFINE_PER_CPU(unsigned long, pfm_syst_info); 633DEFINE_PER_CPU(struct task_struct *, pmu_owner); 634DEFINE_PER_CPU(pfm_context_t *, pmu_ctx); 635DEFINE_PER_CPU(unsigned long, pmu_activation_number); 636EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info); 637 638 639/* forward declaration */ 640static const struct file_operations pfm_file_ops; 641 642/* 643 * forward declarations 644 */ 645#ifndef CONFIG_SMP 646static void pfm_lazy_save_regs (struct task_struct *ta); 647#endif 648 649void dump_pmu_state(const char *); 650static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 651 652#include "perfmon_itanium.h" 653#include "perfmon_mckinley.h" 654#include "perfmon_montecito.h" 655#include "perfmon_generic.h" 656 657static pmu_config_t *pmu_confs[]={ 658 &pmu_conf_mont, 659 &pmu_conf_mck, 660 &pmu_conf_ita, 661 &pmu_conf_gen, /* must be last */ 662 NULL 663}; 664 665 666static int pfm_end_notify_user(pfm_context_t *ctx); 667 668static inline void 669pfm_clear_psr_pp(void) 670{ 671 ia64_rsm(IA64_PSR_PP); 672 ia64_srlz_i(); 673} 674 675static inline void 676pfm_set_psr_pp(void) 677{ 678 ia64_ssm(IA64_PSR_PP); 679 ia64_srlz_i(); 680} 681 682static inline void 683pfm_clear_psr_up(void) 684{ 685 ia64_rsm(IA64_PSR_UP); 686 ia64_srlz_i(); 687} 688 689static inline void 690pfm_set_psr_up(void) 691{ 692 ia64_ssm(IA64_PSR_UP); 693 ia64_srlz_i(); 694} 695 696static inline unsigned long 697pfm_get_psr(void) 698{ 699 unsigned long tmp; 700 tmp = ia64_getreg(_IA64_REG_PSR); 701 ia64_srlz_i(); 702 return tmp; 703} 704 705static inline void 706pfm_set_psr_l(unsigned long val) 707{ 708 ia64_setreg(_IA64_REG_PSR_L, val); 709 ia64_srlz_i(); 710} 711 712static inline void 713pfm_freeze_pmu(void) 714{ 715 ia64_set_pmc(0,1UL); 716 ia64_srlz_d(); 717} 718 719static inline void 720pfm_unfreeze_pmu(void) 721{ 722 ia64_set_pmc(0,0UL); 723 ia64_srlz_d(); 724} 725 726static inline void 727pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs) 728{ 729 int i; 730 731 for (i=0; i < nibrs; i++) { 732 ia64_set_ibr(i, ibrs[i]); 733 ia64_dv_serialize_instruction(); 734 } 735 ia64_srlz_i(); 736} 737 738static inline void 739pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs) 740{ 741 int i; 742 743 for (i=0; i < ndbrs; i++) { 744 ia64_set_dbr(i, dbrs[i]); 745 ia64_dv_serialize_data(); 746 } 747 ia64_srlz_d(); 748} 749 750/* 751 * PMD[i] must be a counter. no check is made 752 */ 753static inline unsigned long 754pfm_read_soft_counter(pfm_context_t *ctx, int i) 755{ 756 return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val); 757} 758 759/* 760 * PMD[i] must be a counter. no check is made 761 */ 762static inline void 763pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val) 764{ 765 unsigned long ovfl_val = pmu_conf->ovfl_val; 766 767 ctx->ctx_pmds[i].val = val & ~ovfl_val; 768 /* 769 * writing to unimplemented part is ignore, so we do not need to 770 * mask off top part 771 */ 772 ia64_set_pmd(i, val & ovfl_val); 773} 774 775static pfm_msg_t * 776pfm_get_new_msg(pfm_context_t *ctx) 777{ 778 int idx, next; 779 780 next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS; 781 782 DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); 783 if (next == ctx->ctx_msgq_head) return NULL; 784 785 idx = ctx->ctx_msgq_tail; 786 ctx->ctx_msgq_tail = next; 787 788 DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx)); 789 790 return ctx->ctx_msgq+idx; 791} 792 793static pfm_msg_t * 794pfm_get_next_msg(pfm_context_t *ctx) 795{ 796 pfm_msg_t *msg; 797 798 DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); 799 800 if (PFM_CTXQ_EMPTY(ctx)) return NULL; 801 802 /* 803 * get oldest message 804 */ 805 msg = ctx->ctx_msgq+ctx->ctx_msgq_head; 806 807 /* 808 * and move forward 809 */ 810 ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS; 811 812 DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type)); 813 814 return msg; 815} 816 817static void 818pfm_reset_msgq(pfm_context_t *ctx) 819{ 820 ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; 821 DPRINT(("ctx=%p msgq reset\n", ctx)); 822} 823 824static void * 825pfm_rvmalloc(unsigned long size) 826{ 827 void *mem; 828 unsigned long addr; 829 830 size = PAGE_ALIGN(size); 831 mem = vmalloc(size); 832 if (mem) { 833 //printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem); 834 memset(mem, 0, size); 835 addr = (unsigned long)mem; 836 while (size > 0) { 837 pfm_reserve_page(addr); 838 addr+=PAGE_SIZE; 839 size-=PAGE_SIZE; 840 } 841 } 842 return mem; 843} 844 845static void 846pfm_rvfree(void *mem, unsigned long size) 847{ 848 unsigned long addr; 849 850 if (mem) { 851 DPRINT(("freeing physical buffer @%p size=%lu\n", mem, size)); 852 addr = (unsigned long) mem; 853 while ((long) size > 0) { 854 pfm_unreserve_page(addr); 855 addr+=PAGE_SIZE; 856 size-=PAGE_SIZE; 857 } 858 vfree(mem); 859 } 860 return; 861} 862 863static pfm_context_t * 864pfm_context_alloc(int ctx_flags) 865{ 866 pfm_context_t *ctx; 867 868 /* 869 * allocate context descriptor 870 * must be able to free with interrupts disabled 871 */ 872 ctx = kzalloc(sizeof(pfm_context_t), GFP_KERNEL); 873 if (ctx) { 874 DPRINT(("alloc ctx @%p\n", ctx)); 875 876 /* 877 * init context protection lock 878 */ 879 spin_lock_init(&ctx->ctx_lock); 880 881 /* 882 * context is unloaded 883 */ 884 ctx->ctx_state = PFM_CTX_UNLOADED; 885 886 /* 887 * initialization of context's flags 888 */ 889 ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0; 890 ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0; 891 ctx->ctx_fl_no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0; 892 /* 893 * will move to set properties 894 * ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0; 895 */ 896 897 /* 898 * init restart semaphore to locked 899 */ 900 init_completion(&ctx->ctx_restart_done); 901 902 /* 903 * activation is used in SMP only 904 */ 905 ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; 906 SET_LAST_CPU(ctx, -1); 907 908 /* 909 * initialize notification message queue 910 */ 911 ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; 912 init_waitqueue_head(&ctx->ctx_msgq_wait); 913 init_waitqueue_head(&ctx->ctx_zombieq); 914 915 } 916 return ctx; 917} 918 919static void 920pfm_context_free(pfm_context_t *ctx) 921{ 922 if (ctx) { 923 DPRINT(("free ctx @%p\n", ctx)); 924 kfree(ctx); 925 } 926} 927 928static void 929pfm_mask_monitoring(struct task_struct *task) 930{ 931 pfm_context_t *ctx = PFM_GET_CTX(task); 932 unsigned long mask, val, ovfl_mask; 933 int i; 934 935 DPRINT_ovfl(("masking monitoring for [%d]\n", task_pid_nr(task))); 936 937 ovfl_mask = pmu_conf->ovfl_val; 938 /* 939 * monitoring can only be masked as a result of a valid 940 * counter overflow. In UP, it means that the PMU still 941 * has an owner. Note that the owner can be different 942 * from the current task. However the PMU state belongs 943 * to the owner. 944 * In SMP, a valid overflow only happens when task is 945 * current. Therefore if we come here, we know that 946 * the PMU state belongs to the current task, therefore 947 * we can access the live registers. 948 * 949 * So in both cases, the live register contains the owner's 950 * state. We can ONLY touch the PMU registers and NOT the PSR. 951 * 952 * As a consequence to this call, the ctx->th_pmds[] array 953 * contains stale information which must be ignored 954 * when context is reloaded AND monitoring is active (see 955 * pfm_restart). 956 */ 957 mask = ctx->ctx_used_pmds[0]; 958 for (i = 0; mask; i++, mask>>=1) { 959 /* skip non used pmds */ 960 if ((mask & 0x1) == 0) continue; 961 val = ia64_get_pmd(i); 962 963 if (PMD_IS_COUNTING(i)) { 964 /* 965 * we rebuild the full 64 bit value of the counter 966 */ 967 ctx->ctx_pmds[i].val += (val & ovfl_mask); 968 } else { 969 ctx->ctx_pmds[i].val = val; 970 } 971 DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", 972 i, 973 ctx->ctx_pmds[i].val, 974 val & ovfl_mask)); 975 } 976 /* 977 * mask monitoring by setting the privilege level to 0 978 * we cannot use psr.pp/psr.up for this, it is controlled by 979 * the user 980 * 981 * if task is current, modify actual registers, otherwise modify 982 * thread save state, i.e., what will be restored in pfm_load_regs() 983 */ 984 mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; 985 for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { 986 if ((mask & 0x1) == 0UL) continue; 987 ia64_set_pmc(i, ctx->th_pmcs[i] & ~0xfUL); 988 ctx->th_pmcs[i] &= ~0xfUL; 989 DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); 990 } 991 /* 992 * make all of this visible 993 */ 994 ia64_srlz_d(); 995} 996 997/* 998 * must always be done with task == current 999 * 1000 * context must be in MASKED state when calling 1001 */ 1002static void 1003pfm_restore_monitoring(struct task_struct *task) 1004{ 1005 pfm_context_t *ctx = PFM_GET_CTX(task); 1006 unsigned long mask, ovfl_mask; 1007 unsigned long psr, val; 1008 int i, is_system; 1009 1010 is_system = ctx->ctx_fl_system; 1011 ovfl_mask = pmu_conf->ovfl_val; 1012 1013 if (task != current) { 1014 printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task_pid_nr(task), task_pid_nr(current)); 1015 return; 1016 } 1017 if (ctx->ctx_state != PFM_CTX_MASKED) { 1018 printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__, 1019 task_pid_nr(task), task_pid_nr(current), ctx->ctx_state); 1020 return; 1021 } 1022 psr = pfm_get_psr(); 1023 /* 1024 * monitoring is masked via the PMC. 1025 * As we restore their value, we do not want each counter to 1026 * restart right away. We stop monitoring using the PSR, 1027 * restore the PMC (and PMD) and then re-establish the psr 1028 * as it was. Note that there can be no pending overflow at 1029 * this point, because monitoring was MASKED. 1030 * 1031 * system-wide session are pinned and self-monitoring 1032 */ 1033 if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { 1034 /* disable dcr pp */ 1035 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); 1036 pfm_clear_psr_pp(); 1037 } else { 1038 pfm_clear_psr_up(); 1039 } 1040 /* 1041 * first, we restore the PMD 1042 */ 1043 mask = ctx->ctx_used_pmds[0]; 1044 for (i = 0; mask; i++, mask>>=1) { 1045 /* skip non used pmds */ 1046 if ((mask & 0x1) == 0) continue; 1047 1048 if (PMD_IS_COUNTING(i)) { 1049 /* 1050 * we split the 64bit value according to 1051 * counter width 1052 */ 1053 val = ctx->ctx_pmds[i].val & ovfl_mask; 1054 ctx->ctx_pmds[i].val &= ~ovfl_mask; 1055 } else { 1056 val = ctx->ctx_pmds[i].val; 1057 } 1058 ia64_set_pmd(i, val); 1059 1060 DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", 1061 i, 1062 ctx->ctx_pmds[i].val, 1063 val)); 1064 } 1065 /* 1066 * restore the PMCs 1067 */ 1068 mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; 1069 for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { 1070 if ((mask & 0x1) == 0UL) continue; 1071 ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; 1072 ia64_set_pmc(i, ctx->th_pmcs[i]); 1073 DPRINT(("[%d] pmc[%d]=0x%lx\n", 1074 task_pid_nr(task), i, ctx->th_pmcs[i])); 1075 } 1076 ia64_srlz_d(); 1077 1078 if (ctx->ctx_fl_using_dbreg) { 1079 pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); 1080 pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); 1081 } 1082 1083 /* 1084 * now restore PSR 1085 */ 1086 if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { 1087 /* enable dcr pp */ 1088 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); 1089 ia64_srlz_i(); 1090 } 1091 pfm_set_psr_l(psr); 1092} 1093 1094static inline void 1095pfm_save_pmds(unsigned long *pmds, unsigned long mask) 1096{ 1097 int i; 1098 1099 ia64_srlz_d(); 1100 1101 for (i=0; mask; i++, mask>>=1) { 1102 if (mask & 0x1) pmds[i] = ia64_get_pmd(i); 1103 } 1104} 1105 1106/* 1107 * reload from thread state (used for ctxw only) 1108 */ 1109static inline void 1110pfm_restore_pmds(unsigned long *pmds, unsigned long mask) 1111{ 1112 int i; 1113 unsigned long val, ovfl_val = pmu_conf->ovfl_val; 1114 1115 for (i=0; mask; i++, mask>>=1) { 1116 if ((mask & 0x1) == 0) continue; 1117 val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i]; 1118 ia64_set_pmd(i, val); 1119 } 1120 ia64_srlz_d(); 1121} 1122 1123/* 1124 * propagate PMD from context to thread-state 1125 */ 1126static inline void 1127pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) 1128{ 1129 unsigned long ovfl_val = pmu_conf->ovfl_val; 1130 unsigned long mask = ctx->ctx_all_pmds[0]; 1131 unsigned long val; 1132 int i; 1133 1134 DPRINT(("mask=0x%lx\n", mask)); 1135 1136 for (i=0; mask; i++, mask>>=1) { 1137 1138 val = ctx->ctx_pmds[i].val; 1139 1140 /* 1141 * We break up the 64 bit value into 2 pieces 1142 * the lower bits go to the machine state in the 1143 * thread (will be reloaded on ctxsw in). 1144 * The upper part stays in the soft-counter. 1145 */ 1146 if (PMD_IS_COUNTING(i)) { 1147 ctx->ctx_pmds[i].val = val & ~ovfl_val; 1148 val &= ovfl_val; 1149 } 1150 ctx->th_pmds[i] = val; 1151 1152 DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n", 1153 i, 1154 ctx->th_pmds[i], 1155 ctx->ctx_pmds[i].val)); 1156 } 1157} 1158 1159/* 1160 * propagate PMC from context to thread-state 1161 */ 1162static inline void 1163pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx) 1164{ 1165 unsigned long mask = ctx->ctx_all_pmcs[0]; 1166 int i; 1167 1168 DPRINT(("mask=0x%lx\n", mask)); 1169 1170 for (i=0; mask; i++, mask>>=1) { 1171 /* masking 0 with ovfl_val yields 0 */ 1172 ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; 1173 DPRINT(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); 1174 } 1175} 1176 1177 1178 1179static inline void 1180pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask) 1181{ 1182 int i; 1183 1184 for (i=0; mask; i++, mask>>=1) { 1185 if ((mask & 0x1) == 0) continue; 1186 ia64_set_pmc(i, pmcs[i]); 1187 } 1188 ia64_srlz_d(); 1189} 1190 1191static inline int 1192pfm_uuid_cmp(pfm_uuid_t a, pfm_uuid_t b) 1193{ 1194 return memcmp(a, b, sizeof(pfm_uuid_t)); 1195} 1196 1197static inline int 1198pfm_buf_fmt_exit(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, struct pt_regs *regs) 1199{ 1200 int ret = 0; 1201 if (fmt->fmt_exit) ret = (*fmt->fmt_exit)(task, buf, regs); 1202 return ret; 1203} 1204 1205static inline int 1206pfm_buf_fmt_getsize(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size) 1207{ 1208 int ret = 0; 1209 if (fmt->fmt_getsize) ret = (*fmt->fmt_getsize)(task, flags, cpu, arg, size); 1210 return ret; 1211} 1212 1213 1214static inline int 1215pfm_buf_fmt_validate(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, 1216 int cpu, void *arg) 1217{ 1218 int ret = 0; 1219 if (fmt->fmt_validate) ret = (*fmt->fmt_validate)(task, flags, cpu, arg); 1220 return ret; 1221} 1222 1223static inline int 1224pfm_buf_fmt_init(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, unsigned int flags, 1225 int cpu, void *arg) 1226{ 1227 int ret = 0; 1228 if (fmt->fmt_init) ret = (*fmt->fmt_init)(task, buf, flags, cpu, arg); 1229 return ret; 1230} 1231 1232static inline int 1233pfm_buf_fmt_restart(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) 1234{ 1235 int ret = 0; 1236 if (fmt->fmt_restart) ret = (*fmt->fmt_restart)(task, ctrl, buf, regs); 1237 return ret; 1238} 1239 1240static inline int 1241pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) 1242{ 1243 int ret = 0; 1244 if (fmt->fmt_restart_active) ret = (*fmt->fmt_restart_active)(task, ctrl, buf, regs); 1245 return ret; 1246} 1247 1248static pfm_buffer_fmt_t * 1249__pfm_find_buffer_fmt(pfm_uuid_t uuid) 1250{ 1251 struct list_head * pos; 1252 pfm_buffer_fmt_t * entry; 1253 1254 list_for_each(pos, &pfm_buffer_fmt_list) { 1255 entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); 1256 if (pfm_uuid_cmp(uuid, entry->fmt_uuid) == 0) 1257 return entry; 1258 } 1259 return NULL; 1260} 1261 1262/* 1263 * find a buffer format based on its uuid 1264 */ 1265static pfm_buffer_fmt_t * 1266pfm_find_buffer_fmt(pfm_uuid_t uuid) 1267{ 1268 pfm_buffer_fmt_t * fmt; 1269 spin_lock(&pfm_buffer_fmt_lock); 1270 fmt = __pfm_find_buffer_fmt(uuid); 1271 spin_unlock(&pfm_buffer_fmt_lock); 1272 return fmt; 1273} 1274 1275int 1276pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt) 1277{ 1278 int ret = 0; 1279 1280 /* some sanity checks */ 1281 if (fmt == NULL || fmt->fmt_name == NULL) return -EINVAL; 1282 1283 /* we need at least a handler */ 1284 if (fmt->fmt_handler == NULL) return -EINVAL; 1285 1286 1287 spin_lock(&pfm_buffer_fmt_lock); 1288 1289 if (__pfm_find_buffer_fmt(fmt->fmt_uuid)) { 1290 printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", fmt->fmt_name); 1291 ret = -EBUSY; 1292 goto out; 1293 } 1294 list_add(&fmt->fmt_list, &pfm_buffer_fmt_list); 1295 printk(KERN_INFO "perfmon: added sampling format %s\n", fmt->fmt_name); 1296 1297out: 1298 spin_unlock(&pfm_buffer_fmt_lock); 1299 return ret; 1300} 1301EXPORT_SYMBOL(pfm_register_buffer_fmt); 1302 1303int 1304pfm_unregister_buffer_fmt(pfm_uuid_t uuid) 1305{ 1306 pfm_buffer_fmt_t *fmt; 1307 int ret = 0; 1308 1309 spin_lock(&pfm_buffer_fmt_lock); 1310 1311 fmt = __pfm_find_buffer_fmt(uuid); 1312 if (!fmt) { 1313 printk(KERN_ERR "perfmon: cannot unregister format, not found\n"); 1314 ret = -EINVAL; 1315 goto out; 1316 } 1317 list_del_init(&fmt->fmt_list); 1318 printk(KERN_INFO "perfmon: removed sampling format: %s\n", fmt->fmt_name); 1319 1320out: 1321 spin_unlock(&pfm_buffer_fmt_lock); 1322 return ret; 1323 1324} 1325EXPORT_SYMBOL(pfm_unregister_buffer_fmt); 1326 1327extern void update_pal_halt_status(int); 1328 1329static int 1330pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) 1331{ 1332 unsigned long flags; 1333 /* 1334 * validity checks on cpu_mask have been done upstream 1335 */ 1336 LOCK_PFS(flags); 1337 1338 DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1339 pfm_sessions.pfs_sys_sessions, 1340 pfm_sessions.pfs_task_sessions, 1341 pfm_sessions.pfs_sys_use_dbregs, 1342 is_syswide, 1343 cpu)); 1344 1345 if (is_syswide) { 1346 /* 1347 * cannot mix system wide and per-task sessions 1348 */ 1349 if (pfm_sessions.pfs_task_sessions > 0UL) { 1350 DPRINT(("system wide not possible, %u conflicting task_sessions\n", 1351 pfm_sessions.pfs_task_sessions)); 1352 goto abort; 1353 } 1354 1355 if (pfm_sessions.pfs_sys_session[cpu]) goto error_conflict; 1356 1357 DPRINT(("reserving system wide session on CPU%u currently on CPU%u\n", cpu, smp_processor_id())); 1358 1359 pfm_sessions.pfs_sys_session[cpu] = task; 1360 1361 pfm_sessions.pfs_sys_sessions++ ; 1362 1363 } else { 1364 if (pfm_sessions.pfs_sys_sessions) goto abort; 1365 pfm_sessions.pfs_task_sessions++; 1366 } 1367 1368 DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1369 pfm_sessions.pfs_sys_sessions, 1370 pfm_sessions.pfs_task_sessions, 1371 pfm_sessions.pfs_sys_use_dbregs, 1372 is_syswide, 1373 cpu)); 1374 1375 /* 1376 * disable default_idle() to go to PAL_HALT 1377 */ 1378 update_pal_halt_status(0); 1379 1380 UNLOCK_PFS(flags); 1381 1382 return 0; 1383 1384error_conflict: 1385 DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n", 1386 task_pid_nr(pfm_sessions.pfs_sys_session[cpu]), 1387 cpu)); 1388abort: 1389 UNLOCK_PFS(flags); 1390 1391 return -EBUSY; 1392 1393} 1394 1395static int 1396pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu) 1397{ 1398 unsigned long flags; 1399 /* 1400 * validity checks on cpu_mask have been done upstream 1401 */ 1402 LOCK_PFS(flags); 1403 1404 DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1405 pfm_sessions.pfs_sys_sessions, 1406 pfm_sessions.pfs_task_sessions, 1407 pfm_sessions.pfs_sys_use_dbregs, 1408 is_syswide, 1409 cpu)); 1410 1411 1412 if (is_syswide) { 1413 pfm_sessions.pfs_sys_session[cpu] = NULL; 1414 /* 1415 * would not work with perfmon+more than one bit in cpu_mask 1416 */ 1417 if (ctx && ctx->ctx_fl_using_dbreg) { 1418 if (pfm_sessions.pfs_sys_use_dbregs == 0) { 1419 printk(KERN_ERR "perfmon: invalid release for ctx %p sys_use_dbregs=0\n", ctx); 1420 } else { 1421 pfm_sessions.pfs_sys_use_dbregs--; 1422 } 1423 } 1424 pfm_sessions.pfs_sys_sessions--; 1425 } else { 1426 pfm_sessions.pfs_task_sessions--; 1427 } 1428 DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1429 pfm_sessions.pfs_sys_sessions, 1430 pfm_sessions.pfs_task_sessions, 1431 pfm_sessions.pfs_sys_use_dbregs, 1432 is_syswide, 1433 cpu)); 1434 1435 /* 1436 * if possible, enable default_idle() to go into PAL_HALT 1437 */ 1438 if (pfm_sessions.pfs_task_sessions == 0 && pfm_sessions.pfs_sys_sessions == 0) 1439 update_pal_halt_status(1); 1440 1441 UNLOCK_PFS(flags); 1442 1443 return 0; 1444} 1445 1446/* 1447 * removes virtual mapping of the sampling buffer. 1448 * IMPORTANT: cannot be called with interrupts disable, e.g. inside 1449 * a PROTECT_CTX() section. 1450 */ 1451static int 1452pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long size) 1453{ 1454 int r; 1455 1456 /* sanity checks */ 1457 if (task->mm == NULL || size == 0UL || vaddr == NULL) { 1458 printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task_pid_nr(task), task->mm); 1459 return -EINVAL; 1460 } 1461 1462 DPRINT(("smpl_vaddr=%p size=%lu\n", vaddr, size)); 1463 1464 /* 1465 * does the actual unmapping 1466 */ 1467 down_write(&task->mm->mmap_sem); 1468 1469 DPRINT(("down_write done smpl_vaddr=%p size=%lu\n", vaddr, size)); 1470 1471 r = pfm_do_munmap(task->mm, (unsigned long)vaddr, size, 0); 1472 1473 up_write(&task->mm->mmap_sem); 1474 if (r !=0) { 1475 printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task_pid_nr(task), vaddr, size); 1476 } 1477 1478 DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r)); 1479 1480 return 0; 1481} 1482 1483/* 1484 * free actual physical storage used by sampling buffer 1485 */ 1486 1487static inline void 1488pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt) 1489{ 1490 if (fmt == NULL) return; 1491 1492 pfm_buf_fmt_exit(fmt, current, NULL, NULL); 1493 1494} 1495 1496/* 1497 * pfmfs should _never_ be mounted by userland - too much of security hassle, 1498 * no real gain from having the whole whorehouse mounted. So we don't need 1499 * any operations on the root directory. However, we need a non-trivial 1500 * d_name - pfm: will go nicely and kill the special-casing in procfs. 1501 */ 1502static struct vfsmount *pfmfs_mnt; 1503 1504static int __init 1505init_pfm_fs(void) 1506{ 1507 int err = register_filesystem(&pfm_fs_type); 1508 if (!err) { 1509 pfmfs_mnt = kern_mount(&pfm_fs_type); 1510 err = PTR_ERR(pfmfs_mnt); 1511 if (IS_ERR(pfmfs_mnt)) 1512 unregister_filesystem(&pfm_fs_type); 1513 else 1514 err = 0; 1515 } 1516 return err; 1517} 1518 1519static ssize_t 1520pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos) 1521{ 1522 pfm_context_t *ctx; 1523 pfm_msg_t *msg; 1524 ssize_t ret; 1525 unsigned long flags; 1526 DECLARE_WAITQUEUE(wait, current); 1527 if (PFM_IS_FILE(filp) == 0) { 1528 printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current)); 1529 return -EINVAL; 1530 } 1531 1532 ctx = (pfm_context_t *)filp->private_data; 1533 if (ctx == NULL) { 1534 printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", task_pid_nr(current)); 1535 return -EINVAL; 1536 } 1537 1538 /* 1539 * check even when there is no message 1540 */ 1541 if (size < sizeof(pfm_msg_t)) { 1542 DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t))); 1543 return -EINVAL; 1544 } 1545 1546 PROTECT_CTX(ctx, flags); 1547 1548 /* 1549 * put ourselves on the wait queue 1550 */ 1551 add_wait_queue(&ctx->ctx_msgq_wait, &wait); 1552 1553 1554 for(;;) { 1555 /* 1556 * check wait queue 1557 */ 1558 1559 set_current_state(TASK_INTERRUPTIBLE); 1560 1561 DPRINT(("head=%d tail=%d\n", ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); 1562 1563 ret = 0; 1564 if(PFM_CTXQ_EMPTY(ctx) == 0) break; 1565 1566 UNPROTECT_CTX(ctx, flags); 1567 1568 /* 1569 * check non-blocking read 1570 */ 1571 ret = -EAGAIN; 1572 if(filp->f_flags & O_NONBLOCK) break; 1573 1574 /* 1575 * check pending signals 1576 */ 1577 if(signal_pending(current)) { 1578 ret = -EINTR; 1579 break; 1580 } 1581 /* 1582 * no message, so wait 1583 */ 1584 schedule(); 1585 1586 PROTECT_CTX(ctx, flags); 1587 } 1588 DPRINT(("[%d] back to running ret=%ld\n", task_pid_nr(current), ret)); 1589 set_current_state(TASK_RUNNING); 1590 remove_wait_queue(&ctx->ctx_msgq_wait, &wait); 1591 1592 if (ret < 0) goto abort; 1593 1594 ret = -EINVAL; 1595 msg = pfm_get_next_msg(ctx); 1596 if (msg == NULL) { 1597 printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, task_pid_nr(current)); 1598 goto abort_locked; 1599 } 1600 1601 DPRINT(("fd=%d type=%d\n", msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type)); 1602 1603 ret = -EFAULT; 1604 if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t); 1605 1606abort_locked: 1607 UNPROTECT_CTX(ctx, flags); 1608abort: 1609 return ret; 1610} 1611 1612static ssize_t 1613pfm_write(struct file *file, const char __user *ubuf, 1614 size_t size, loff_t *ppos) 1615{ 1616 DPRINT(("pfm_write called\n")); 1617 return -EINVAL; 1618} 1619 1620static unsigned int 1621pfm_poll(struct file *filp, poll_table * wait) 1622{ 1623 pfm_context_t *ctx; 1624 unsigned long flags; 1625 unsigned int mask = 0; 1626 1627 if (PFM_IS_FILE(filp) == 0) { 1628 printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current)); 1629 return 0; 1630 } 1631 1632 ctx = (pfm_context_t *)filp->private_data; 1633 if (ctx == NULL) { 1634 printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", task_pid_nr(current)); 1635 return 0; 1636 } 1637 1638 1639 DPRINT(("pfm_poll ctx_fd=%d before poll_wait\n", ctx->ctx_fd)); 1640 1641 poll_wait(filp, &ctx->ctx_msgq_wait, wait); 1642 1643 PROTECT_CTX(ctx, flags); 1644 1645 if (PFM_CTXQ_EMPTY(ctx) == 0) 1646 mask = POLLIN | POLLRDNORM; 1647 1648 UNPROTECT_CTX(ctx, flags); 1649 1650 DPRINT(("pfm_poll ctx_fd=%d mask=0x%x\n", ctx->ctx_fd, mask)); 1651 1652 return mask; 1653} 1654 1655static long 1656pfm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 1657{ 1658 DPRINT(("pfm_ioctl called\n")); 1659 return -EINVAL; 1660} 1661 1662/* 1663 * interrupt cannot be masked when coming here 1664 */ 1665static inline int 1666pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on) 1667{ 1668 int ret; 1669 1670 ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue); 1671 1672 DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n", 1673 task_pid_nr(current), 1674 fd, 1675 on, 1676 ctx->ctx_async_queue, ret)); 1677 1678 return ret; 1679} 1680 1681static int 1682pfm_fasync(int fd, struct file *filp, int on) 1683{ 1684 pfm_context_t *ctx; 1685 int ret; 1686 1687 if (PFM_IS_FILE(filp) == 0) { 1688 printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", task_pid_nr(current)); 1689 return -EBADF; 1690 } 1691 1692 ctx = (pfm_context_t *)filp->private_data; 1693 if (ctx == NULL) { 1694 printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", task_pid_nr(current)); 1695 return -EBADF; 1696 } 1697 /* 1698 * we cannot mask interrupts during this call because this may 1699 * may go to sleep if memory is not readily avalaible. 1700 * 1701 * We are protected from the conetxt disappearing by the get_fd()/put_fd() 1702 * done in caller. Serialization of this function is ensured by caller. 1703 */ 1704 ret = pfm_do_fasync(fd, filp, ctx, on); 1705 1706 1707 DPRINT(("pfm_fasync called on ctx_fd=%d on=%d async_queue=%p ret=%d\n", 1708 fd, 1709 on, 1710 ctx->ctx_async_queue, ret)); 1711 1712 return ret; 1713} 1714 1715#ifdef CONFIG_SMP 1716/* 1717 * this function is exclusively called from pfm_close(). 1718 * The context is not protected at that time, nor are interrupts 1719 * on the remote CPU. That's necessary to avoid deadlocks. 1720 */ 1721static void 1722pfm_syswide_force_stop(void *info) 1723{ 1724 pfm_context_t *ctx = (pfm_context_t *)info; 1725 struct pt_regs *regs = task_pt_regs(current); 1726 struct task_struct *owner; 1727 unsigned long flags; 1728 int ret; 1729 1730 if (ctx->ctx_cpu != smp_processor_id()) { 1731 printk(KERN_ERR "perfmon: pfm_syswide_force_stop for CPU%d but on CPU%d\n", 1732 ctx->ctx_cpu, 1733 smp_processor_id()); 1734 return; 1735 } 1736 owner = GET_PMU_OWNER(); 1737 if (owner != ctx->ctx_task) { 1738 printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n", 1739 smp_processor_id(), 1740 task_pid_nr(owner), task_pid_nr(ctx->ctx_task)); 1741 return; 1742 } 1743 if (GET_PMU_CTX() != ctx) { 1744 printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected ctx %p instead of %p\n", 1745 smp_processor_id(), 1746 GET_PMU_CTX(), ctx); 1747 return; 1748 } 1749 1750 DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), task_pid_nr(ctx->ctx_task))); 1751 /* 1752 * the context is already protected in pfm_close(), we simply 1753 * need to mask interrupts to avoid a PMU interrupt race on 1754 * this CPU 1755 */ 1756 local_irq_save(flags); 1757 1758 ret = pfm_context_unload(ctx, NULL, 0, regs); 1759 if (ret) { 1760 DPRINT(("context_unload returned %d\n", ret)); 1761 } 1762 1763 /* 1764 * unmask interrupts, PMU interrupts are now spurious here 1765 */ 1766 local_irq_restore(flags); 1767} 1768 1769static void 1770pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx) 1771{ 1772 int ret; 1773 1774 DPRINT(("calling CPU%d for cleanup\n", ctx->ctx_cpu)); 1775 ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 1); 1776 DPRINT(("called CPU%d for cleanup ret=%d\n", ctx->ctx_cpu, ret)); 1777} 1778#endif /* CONFIG_SMP */ 1779 1780/* 1781 * called for each close(). Partially free resources. 1782 * When caller is self-monitoring, the context is unloaded. 1783 */ 1784static int 1785pfm_flush(struct file *filp, fl_owner_t id) 1786{ 1787 pfm_context_t *ctx; 1788 struct task_struct *task; 1789 struct pt_regs *regs; 1790 unsigned long flags; 1791 unsigned long smpl_buf_size = 0UL; 1792 void *smpl_buf_vaddr = NULL; 1793 int state, is_system; 1794 1795 if (PFM_IS_FILE(filp) == 0) { 1796 DPRINT(("bad magic for\n")); 1797 return -EBADF; 1798 } 1799 1800 ctx = (pfm_context_t *)filp->private_data; 1801 if (ctx == NULL) { 1802 printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", task_pid_nr(current)); 1803 return -EBADF; 1804 } 1805 1806 /* 1807 * remove our file from the async queue, if we use this mode. 1808 * This can be done without the context being protected. We come 1809 * here when the context has become unreachable by other tasks. 1810 * 1811 * We may still have active monitoring at this point and we may 1812 * end up in pfm_overflow_handler(). However, fasync_helper() 1813 * operates with interrupts disabled and it cleans up the 1814 * queue. If the PMU handler is called prior to entering 1815 * fasync_helper() then it will send a signal. If it is 1816 * invoked after, it will find an empty queue and no 1817 * signal will be sent. In both case, we are safe 1818 */ 1819 PROTECT_CTX(ctx, flags); 1820 1821 state = ctx->ctx_state; 1822 is_system = ctx->ctx_fl_system; 1823 1824 task = PFM_CTX_TASK(ctx); 1825 regs = task_pt_regs(task); 1826 1827 DPRINT(("ctx_state=%d is_current=%d\n", 1828 state, 1829 task == current ? 1 : 0)); 1830 1831 /* 1832 * if state == UNLOADED, then task is NULL 1833 */ 1834 1835 /* 1836 * we must stop and unload because we are losing access to the context. 1837 */ 1838 if (task == current) { 1839#ifdef CONFIG_SMP 1840 /* 1841 * the task IS the owner but it migrated to another CPU: that's bad 1842 * but we must handle this cleanly. Unfortunately, the kernel does 1843 * not provide a mechanism to block migration (while the context is loaded). 1844 * 1845 * We need to release the resource on the ORIGINAL cpu. 1846 */ 1847 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 1848 1849 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 1850 /* 1851 * keep context protected but unmask interrupt for IPI 1852 */ 1853 local_irq_restore(flags); 1854 1855 pfm_syswide_cleanup_other_cpu(ctx); 1856 1857 /* 1858 * restore interrupt masking 1859 */ 1860 local_irq_save(flags); 1861 1862 /* 1863 * context is unloaded at this point 1864 */ 1865 } else 1866#endif /* CONFIG_SMP */ 1867 { 1868 1869 DPRINT(("forcing unload\n")); 1870 /* 1871 * stop and unload, returning with state UNLOADED 1872 * and session unreserved. 1873 */ 1874 pfm_context_unload(ctx, NULL, 0, regs); 1875 1876 DPRINT(("ctx_state=%d\n", ctx->ctx_state)); 1877 } 1878 } 1879 1880 /* 1881 * remove virtual mapping, if any, for the calling task. 1882 * cannot reset ctx field until last user is calling close(). 1883 * 1884 * ctx_smpl_vaddr must never be cleared because it is needed 1885 * by every task with access to the context 1886 * 1887 * When called from do_exit(), the mm context is gone already, therefore 1888 * mm is NULL, i.e., the VMA is already gone and we do not have to 1889 * do anything here 1890 */ 1891 if (ctx->ctx_smpl_vaddr && current->mm) { 1892 smpl_buf_vaddr = ctx->ctx_smpl_vaddr; 1893 smpl_buf_size = ctx->ctx_smpl_size; 1894 } 1895 1896 UNPROTECT_CTX(ctx, flags); 1897 1898 /* 1899 * if there was a mapping, then we systematically remove it 1900 * at this point. Cannot be done inside critical section 1901 * because some VM function reenables interrupts. 1902 * 1903 */ 1904 if (smpl_buf_vaddr) pfm_remove_smpl_mapping(current, smpl_buf_vaddr, smpl_buf_size); 1905 1906 return 0; 1907} 1908/* 1909 * called either on explicit close() or from exit_files(). 1910 * Only the LAST user of the file gets to this point, i.e., it is 1911 * called only ONCE. 1912 * 1913 * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero 1914 * (fput()),i.e, last task to access the file. Nobody else can access the 1915 * file at this point. 1916 * 1917 * When called from exit_files(), the VMA has been freed because exit_mm() 1918 * is executed before exit_files(). 1919 * 1920 * When called from exit_files(), the current task is not yet ZOMBIE but we 1921 * flush the PMU state to the context. 1922 */ 1923static int 1924pfm_close(struct inode *inode, struct file *filp) 1925{ 1926 pfm_context_t *ctx; 1927 struct task_struct *task; 1928 struct pt_regs *regs; 1929 DECLARE_WAITQUEUE(wait, current); 1930 unsigned long flags; 1931 unsigned long smpl_buf_size = 0UL; 1932 void *smpl_buf_addr = NULL; 1933 int free_possible = 1; 1934 int state, is_system; 1935 1936 DPRINT(("pfm_close called private=%p\n", filp->private_data)); 1937 1938 if (PFM_IS_FILE(filp) == 0) { 1939 DPRINT(("bad magic\n")); 1940 return -EBADF; 1941 } 1942 1943 ctx = (pfm_context_t *)filp->private_data; 1944 if (ctx == NULL) { 1945 printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", task_pid_nr(current)); 1946 return -EBADF; 1947 } 1948 1949 PROTECT_CTX(ctx, flags); 1950 1951 state = ctx->ctx_state; 1952 is_system = ctx->ctx_fl_system; 1953 1954 task = PFM_CTX_TASK(ctx); 1955 regs = task_pt_regs(task); 1956 1957 DPRINT(("ctx_state=%d is_current=%d\n", 1958 state, 1959 task == current ? 1 : 0)); 1960 1961 /* 1962 * if task == current, then pfm_flush() unloaded the context 1963 */ 1964 if (state == PFM_CTX_UNLOADED) goto doit; 1965 1966 /* 1967 * context is loaded/masked and task != current, we need to 1968 * either force an unload or go zombie 1969 */ 1970 1971 /* 1972 * The task is currently blocked or will block after an overflow. 1973 * we must force it to wakeup to get out of the 1974 * MASKED state and transition to the unloaded state by itself. 1975 * 1976 * This situation is only possible for per-task mode 1977 */ 1978 if (state == PFM_CTX_MASKED && CTX_OVFL_NOBLOCK(ctx) == 0) { 1979 1980 /* 1981 * set a "partial" zombie state to be checked 1982 * upon return from down() in pfm_handle_work(). 1983 * 1984 * We cannot use the ZOMBIE state, because it is checked 1985 * by pfm_load_regs() which is called upon wakeup from down(). 1986 * In such case, it would free the context and then we would 1987 * return to pfm_handle_work() which would access the 1988 * stale context. Instead, we set a flag invisible to pfm_load_regs() 1989 * but visible to pfm_handle_work(). 1990 * 1991 * For some window of time, we have a zombie context with 1992 * ctx_state = MASKED and not ZOMBIE 1993 */ 1994 ctx->ctx_fl_going_zombie = 1; 1995 1996 /* 1997 * force task to wake up from MASKED state 1998 */ 1999 complete(&ctx->ctx_restart_done); 2000 2001 DPRINT(("waking up ctx_state=%d\n", state)); 2002 2003 /* 2004 * put ourself to sleep waiting for the other 2005 * task to report completion 2006 * 2007 * the context is protected by mutex, therefore there 2008 * is no risk of being notified of completion before 2009 * begin actually on the waitq. 2010 */ 2011 set_current_state(TASK_INTERRUPTIBLE); 2012 add_wait_queue(&ctx->ctx_zombieq, &wait); 2013 2014 UNPROTECT_CTX(ctx, flags); 2015 2016 schedule(); 2017 2018 2019 PROTECT_CTX(ctx, flags); 2020 2021 2022 remove_wait_queue(&ctx->ctx_zombieq, &wait); 2023 set_current_state(TASK_RUNNING); 2024 2025 /* 2026 * context is unloaded at this point 2027 */ 2028 DPRINT(("after zombie wakeup ctx_state=%d for\n", state)); 2029 } 2030 else if (task != current) { 2031#ifdef CONFIG_SMP 2032 /* 2033 * switch context to zombie state 2034 */ 2035 ctx->ctx_state = PFM_CTX_ZOMBIE; 2036 2037 DPRINT(("zombie ctx for [%d]\n", task_pid_nr(task))); 2038 /* 2039 * cannot free the context on the spot. deferred until 2040 * the task notices the ZOMBIE state 2041 */ 2042 free_possible = 0; 2043#else 2044 pfm_context_unload(ctx, NULL, 0, regs); 2045#endif 2046 } 2047 2048doit: 2049 /* reload state, may have changed during opening of critical section */ 2050 state = ctx->ctx_state; 2051 2052 /* 2053 * the context is still attached to a task (possibly current) 2054 * we cannot destroy it right now 2055 */ 2056 2057 /* 2058 * we must free the sampling buffer right here because 2059 * we cannot rely on it being cleaned up later by the 2060 * monitored task. It is not possible to free vmalloc'ed 2061 * memory in pfm_load_regs(). Instead, we remove the buffer 2062 * now. should there be subsequent PMU overflow originally 2063 * meant for sampling, the will be converted to spurious 2064 * and that's fine because the monitoring tools is gone anyway. 2065 */ 2066 if (ctx->ctx_smpl_hdr) { 2067 smpl_buf_addr = ctx->ctx_smpl_hdr; 2068 smpl_buf_size = ctx->ctx_smpl_size; 2069 /* no more sampling */ 2070 ctx->ctx_smpl_hdr = NULL; 2071 ctx->ctx_fl_is_sampling = 0; 2072 } 2073 2074 DPRINT(("ctx_state=%d free_possible=%d addr=%p size=%lu\n", 2075 state, 2076 free_possible, 2077 smpl_buf_addr, 2078 smpl_buf_size)); 2079 2080 if (smpl_buf_addr) pfm_exit_smpl_buffer(ctx->ctx_buf_fmt); 2081 2082 /* 2083 * UNLOADED that the session has already been unreserved. 2084 */ 2085 if (state == PFM_CTX_ZOMBIE) { 2086 pfm_unreserve_session(ctx, ctx->ctx_fl_system , ctx->ctx_cpu); 2087 } 2088 2089 /* 2090 * disconnect file descriptor from context must be done 2091 * before we unlock. 2092 */ 2093 filp->private_data = NULL; 2094 2095 /* 2096 * if we free on the spot, the context is now completely unreachable 2097 * from the callers side. The monitored task side is also cut, so we 2098 * can freely cut. 2099 * 2100 * If we have a deferred free, only the caller side is disconnected. 2101 */ 2102 UNPROTECT_CTX(ctx, flags); 2103 2104 /* 2105 * All memory free operations (especially for vmalloc'ed memory) 2106 * MUST be done with interrupts ENABLED. 2107 */ 2108 if (smpl_buf_addr) pfm_rvfree(smpl_buf_addr, smpl_buf_size); 2109 2110 /* 2111 * return the memory used by the context 2112 */ 2113 if (free_possible) pfm_context_free(ctx); 2114 2115 return 0; 2116} 2117 2118static int 2119pfm_no_open(struct inode *irrelevant, struct file *dontcare) 2120{ 2121 DPRINT(("pfm_no_open called\n")); 2122 return -ENXIO; 2123} 2124 2125 2126 2127static const struct file_operations pfm_file_ops = { 2128 .llseek = no_llseek, 2129 .read = pfm_read, 2130 .write = pfm_write, 2131 .poll = pfm_poll, 2132 .unlocked_ioctl = pfm_ioctl, 2133 .open = pfm_no_open, /* special open code to disallow open via /proc */ 2134 .fasync = pfm_fasync, 2135 .release = pfm_close, 2136 .flush = pfm_flush 2137}; 2138 2139static int 2140pfmfs_delete_dentry(struct dentry *dentry) 2141{ 2142 return 1; 2143} 2144 2145static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen) 2146{ 2147 return dynamic_dname(dentry, buffer, buflen, "pfm:[%lu]", 2148 dentry->d_inode->i_ino); 2149} 2150 2151static const struct dentry_operations pfmfs_dentry_operations = { 2152 .d_delete = pfmfs_delete_dentry, 2153 .d_dname = pfmfs_dname, 2154}; 2155 2156 2157static struct file * 2158pfm_alloc_file(pfm_context_t *ctx) 2159{ 2160 struct file *file; 2161 struct inode *inode; 2162 struct path path; 2163 struct qstr this = { .name = "" }; 2164 2165 /* 2166 * allocate a new inode 2167 */ 2168 inode = new_inode(pfmfs_mnt->mnt_sb); 2169 if (!inode) 2170 return ERR_PTR(-ENOMEM); 2171 2172 DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode)); 2173 2174 inode->i_mode = S_IFCHR|S_IRUGO; 2175 inode->i_uid = current_fsuid(); 2176 inode->i_gid = current_fsgid(); 2177 2178 /* 2179 * allocate a new dcache entry 2180 */ 2181 path.dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this); 2182 if (!path.dentry) { 2183 iput(inode); 2184 return ERR_PTR(-ENOMEM); 2185 } 2186 path.mnt = mntget(pfmfs_mnt); 2187 2188 path.dentry->d_op = &pfmfs_dentry_operations; 2189 d_add(path.dentry, inode); 2190 2191 file = alloc_file(&path, FMODE_READ, &pfm_file_ops); 2192 if (!file) { 2193 path_put(&path); 2194 return ERR_PTR(-ENFILE); 2195 } 2196 2197 file->f_flags = O_RDONLY; 2198 file->private_data = ctx; 2199 2200 return file; 2201} 2202 2203static int 2204pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long addr, unsigned long size) 2205{ 2206 DPRINT(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size)); 2207 2208 while (size > 0) { 2209 unsigned long pfn = ia64_tpa(buf) >> PAGE_SHIFT; 2210 2211 2212 if (remap_pfn_range(vma, addr, pfn, PAGE_SIZE, PAGE_READONLY)) 2213 return -ENOMEM; 2214 2215 addr += PAGE_SIZE; 2216 buf += PAGE_SIZE; 2217 size -= PAGE_SIZE; 2218 } 2219 return 0; 2220} 2221 2222/* 2223 * allocate a sampling buffer and remaps it into the user address space of the task 2224 */ 2225static int 2226pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr) 2227{ 2228 struct mm_struct *mm = task->mm; 2229 struct vm_area_struct *vma = NULL; 2230 unsigned long size; 2231 void *smpl_buf; 2232 2233 2234 /* 2235 * the fixed header + requested size and align to page boundary 2236 */ 2237 size = PAGE_ALIGN(rsize); 2238 2239 DPRINT(("sampling buffer rsize=%lu size=%lu bytes\n", rsize, size)); 2240 2241 if (size > task_rlimit(task, RLIMIT_MEMLOCK)) 2242 return -ENOMEM; 2243 2244 /* 2245 * We do the easy to undo allocations first. 2246 * 2247 * pfm_rvmalloc(), clears the buffer, so there is no leak 2248 */ 2249 smpl_buf = pfm_rvmalloc(size); 2250 if (smpl_buf == NULL) { 2251 DPRINT(("Can't allocate sampling buffer\n")); 2252 return -ENOMEM; 2253 } 2254 2255 DPRINT(("smpl_buf @%p\n", smpl_buf)); 2256 2257 /* allocate vma */ 2258 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); 2259 if (!vma) { 2260 DPRINT(("Cannot allocate vma\n")); 2261 goto error_kmem; 2262 } 2263 INIT_LIST_HEAD(&vma->anon_vma_chain); 2264 2265 /* 2266 * partially initialize the vma for the sampling buffer 2267 */ 2268 vma->vm_mm = mm; 2269 vma->vm_file = filp; 2270 vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED; 2271 vma->vm_page_prot = PAGE_READONLY; 2272 2273 /* 2274 * Now we have everything we need and we can initialize 2275 * and connect all the data structures 2276 */ 2277 2278 ctx->ctx_smpl_hdr = smpl_buf; 2279 ctx->ctx_smpl_size = size; /* aligned size */ 2280 2281 /* 2282 * Let's do the difficult operations next. 2283 * 2284 * now we atomically find some area in the address space and 2285 * remap the buffer in it. 2286 */ 2287 down_write(&task->mm->mmap_sem); 2288 2289 /* find some free area in address space, must have mmap sem held */ 2290 vma->vm_start = pfm_get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS, 0); 2291 if (vma->vm_start == 0UL) { 2292 DPRINT(("Cannot find unmapped area for size %ld\n", size)); 2293 up_write(&task->mm->mmap_sem); 2294 goto error; 2295 } 2296 vma->vm_end = vma->vm_start + size; 2297 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT; 2298 2299 DPRINT(("aligned size=%ld, hdr=%p mapped @0x%lx\n", size, ctx->ctx_smpl_hdr, vma->vm_start)); 2300 2301 /* can only be applied to current task, need to have the mm semaphore held when called */ 2302 if (pfm_remap_buffer(vma, (unsigned long)smpl_buf, vma->vm_start, size)) { 2303 DPRINT(("Can't remap buffer\n")); 2304 up_write(&task->mm->mmap_sem); 2305 goto error; 2306 } 2307 2308 get_file(filp); 2309 2310 /* 2311 * now insert the vma in the vm list for the process, must be 2312 * done with mmap lock held 2313 */ 2314 insert_vm_struct(mm, vma); 2315 2316 mm->total_vm += size >> PAGE_SHIFT; 2317 vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, 2318 vma_pages(vma)); 2319 up_write(&task->mm->mmap_sem); 2320 2321 /* 2322 * keep track of user level virtual address 2323 */ 2324 ctx->ctx_smpl_vaddr = (void *)vma->vm_start; 2325 *(unsigned long *)user_vaddr = vma->vm_start; 2326 2327 return 0; 2328 2329error: 2330 kmem_cache_free(vm_area_cachep, vma); 2331error_kmem: 2332 pfm_rvfree(smpl_buf, size); 2333 2334 return -ENOMEM; 2335} 2336 2337static int 2338pfm_bad_permissions(struct task_struct *task) 2339{ 2340 const struct cred *tcred; 2341 uid_t uid = current_uid(); 2342 gid_t gid = current_gid(); 2343 int ret; 2344 2345 rcu_read_lock(); 2346 tcred = __task_cred(task); 2347 2348 /* inspired by ptrace_attach() */ 2349 DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n", 2350 uid, 2351 gid, 2352 tcred->euid, 2353 tcred->suid, 2354 tcred->uid, 2355 tcred->egid, 2356 tcred->sgid)); 2357 2358 ret = ((uid != tcred->euid) 2359 || (uid != tcred->suid) 2360 || (uid != tcred->uid) 2361 || (gid != tcred->egid) 2362 || (gid != tcred->sgid) 2363 || (gid != tcred->gid)) && !capable(CAP_SYS_PTRACE); 2364 2365 rcu_read_unlock(); 2366 return ret; 2367} 2368 2369static int 2370pfarg_is_sane(struct task_struct *task, pfarg_context_t *pfx) 2371{ 2372 int ctx_flags; 2373 2374 /* valid signal */ 2375 2376 ctx_flags = pfx->ctx_flags; 2377 2378 if (ctx_flags & PFM_FL_SYSTEM_WIDE) { 2379 2380 /* 2381 * cannot block in this mode 2382 */ 2383 if (ctx_flags & PFM_FL_NOTIFY_BLOCK) { 2384 DPRINT(("cannot use blocking mode when in system wide monitoring\n")); 2385 return -EINVAL; 2386 } 2387 } else { 2388 } 2389 /* probably more to add here */ 2390 2391 return 0; 2392} 2393 2394static int 2395pfm_setup_buffer_fmt(struct task_struct *task, struct file *filp, pfm_context_t *ctx, unsigned int ctx_flags, 2396 unsigned int cpu, pfarg_context_t *arg) 2397{ 2398 pfm_buffer_fmt_t *fmt = NULL; 2399 unsigned long size = 0UL; 2400 void *uaddr = NULL; 2401 void *fmt_arg = NULL; 2402 int ret = 0; 2403#define PFM_CTXARG_BUF_ARG(a) (pfm_buffer_fmt_t *)(a+1) 2404 2405 /* invoke and lock buffer format, if found */ 2406 fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id); 2407 if (fmt == NULL) { 2408 DPRINT(("[%d] cannot find buffer format\n", task_pid_nr(task))); 2409 return -EINVAL; 2410 } 2411 2412 /* 2413 * buffer argument MUST be contiguous to pfarg_context_t 2414 */ 2415 if (fmt->fmt_arg_size) fmt_arg = PFM_CTXARG_BUF_ARG(arg); 2416 2417 ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg); 2418 2419 DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task_pid_nr(task), ctx_flags, cpu, fmt_arg, ret)); 2420 2421 if (ret) goto error; 2422 2423 /* link buffer format and context */ 2424 ctx->ctx_buf_fmt = fmt; 2425 ctx->ctx_fl_is_sampling = 1; /* assume record() is defined */ 2426 2427 /* 2428 * check if buffer format wants to use perfmon buffer allocation/mapping service 2429 */ 2430 ret = pfm_buf_fmt_getsize(fmt, task, ctx_flags, cpu, fmt_arg, &size); 2431 if (ret) goto error; 2432 2433 if (size) { 2434 /* 2435 * buffer is always remapped into the caller's address space 2436 */ 2437 ret = pfm_smpl_buffer_alloc(current, filp, ctx, size, &uaddr); 2438 if (ret) goto error; 2439 2440 /* keep track of user address of buffer */ 2441 arg->ctx_smpl_vaddr = uaddr; 2442 } 2443 ret = pfm_buf_fmt_init(fmt, task, ctx->ctx_smpl_hdr, ctx_flags, cpu, fmt_arg); 2444 2445error: 2446 return ret; 2447} 2448 2449static void 2450pfm_reset_pmu_state(pfm_context_t *ctx) 2451{ 2452 int i; 2453 2454 /* 2455 * install reset values for PMC. 2456 */ 2457 for (i=1; PMC_IS_LAST(i) == 0; i++) { 2458 if (PMC_IS_IMPL(i) == 0) continue; 2459 ctx->ctx_pmcs[i] = PMC_DFL_VAL(i); 2460 DPRINT(("pmc[%d]=0x%lx\n", i, ctx->ctx_pmcs[i])); 2461 } 2462 /* 2463 * PMD registers are set to 0UL when the context in memset() 2464 */ 2465 2466 /* 2467 * On context switched restore, we must restore ALL pmc and ALL pmd even 2468 * when they are not actively used by the task. In UP, the incoming process 2469 * may otherwise pick up left over PMC, PMD state from the previous process. 2470 * As opposed to PMD, stale PMC can cause harm to the incoming 2471 * process because they may change what is being measured. 2472 * Therefore, we must systematically reinstall the entire 2473 * PMC state. In SMP, the same thing is possible on the 2474 * same CPU but also on between 2 CPUs. 2475 * 2476 * The problem with PMD is information leaking especially 2477 * to user level when psr.sp=0 2478 * 2479 * There is unfortunately no easy way to avoid this problem 2480 * on either UP or SMP. This definitively slows down the 2481 * pfm_load_regs() function. 2482 */ 2483 2484 /* 2485 * bitmask of all PMCs accessible to this context 2486 * 2487 * PMC0 is treated differently. 2488 */ 2489 ctx->ctx_all_pmcs[0] = pmu_conf->impl_pmcs[0] & ~0x1; 2490 2491 /* 2492 * bitmask of all PMDs that are accessible to this context 2493 */ 2494 ctx->ctx_all_pmds[0] = pmu_conf->impl_pmds[0]; 2495 2496 DPRINT(("<%d> all_pmcs=0x%lx all_pmds=0x%lx\n", ctx->ctx_fd, ctx->ctx_all_pmcs[0],ctx->ctx_all_pmds[0])); 2497 2498 /* 2499 * useful in case of re-enable after disable 2500 */ 2501 ctx->ctx_used_ibrs[0] = 0UL; 2502 ctx->ctx_used_dbrs[0] = 0UL; 2503} 2504 2505static int 2506pfm_ctx_getsize(void *arg, size_t *sz) 2507{ 2508 pfarg_context_t *req = (pfarg_context_t *)arg; 2509 pfm_buffer_fmt_t *fmt; 2510 2511 *sz = 0; 2512 2513 if (!pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) return 0; 2514 2515 fmt = pfm_find_buffer_fmt(req->ctx_smpl_buf_id); 2516 if (fmt == NULL) { 2517 DPRINT(("cannot find buffer format\n")); 2518 return -EINVAL; 2519 } 2520 /* get just enough to copy in user parameters */ 2521 *sz = fmt->fmt_arg_size; 2522 DPRINT(("arg_size=%lu\n", *sz)); 2523 2524 return 0; 2525} 2526 2527 2528 2529/* 2530 * cannot attach if : 2531 * - kernel task 2532 * - task not owned by caller 2533 * - task incompatible with context mode 2534 */ 2535static int 2536pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task) 2537{ 2538 /* 2539 * no kernel task or task not owner by caller 2540 */ 2541 if (task->mm == NULL) { 2542 DPRINT(("task [%d] has not memory context (kernel thread)\n", task_pid_nr(task))); 2543 return -EPERM; 2544 } 2545 if (pfm_bad_permissions(task)) { 2546 DPRINT(("no permission to attach to [%d]\n", task_pid_nr(task))); 2547 return -EPERM; 2548 } 2549 /* 2550 * cannot block in self-monitoring mode 2551 */ 2552 if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) { 2553 DPRINT(("cannot load a blocking context on self for [%d]\n", task_pid_nr(task))); 2554 return -EINVAL; 2555 } 2556 2557 if (task->exit_state == EXIT_ZOMBIE) { 2558 DPRINT(("cannot attach to zombie task [%d]\n", task_pid_nr(task))); 2559 return -EBUSY; 2560 } 2561 2562 /* 2563 * always ok for self 2564 */ 2565 if (task == current) return 0; 2566 2567 if (!task_is_stopped_or_traced(task)) { 2568 DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task_pid_nr(task), task->state)); 2569 return -EBUSY; 2570 } 2571 /* 2572 * make sure the task is off any CPU 2573 */ 2574 wait_task_inactive(task, 0); 2575 2576 /* more to come... */ 2577 2578 return 0; 2579} 2580 2581static int 2582pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task) 2583{ 2584 struct task_struct *p = current; 2585 int ret; 2586 2587 if (pid < 2) return -EPERM; 2588 2589 if (pid != task_pid_vnr(current)) { 2590 2591 read_lock(&tasklist_lock); 2592 2593 p = find_task_by_vpid(pid); 2594 2595 /* make sure task cannot go away while we operate on it */ 2596 if (p) get_task_struct(p); 2597 2598 read_unlock(&tasklist_lock); 2599 2600 if (p == NULL) return -ESRCH; 2601 } 2602 2603 ret = pfm_task_incompatible(ctx, p); 2604 if (ret == 0) { 2605 *task = p; 2606 } else if (p != current) { 2607 pfm_put_task(p); 2608 } 2609 return ret; 2610} 2611 2612 2613 2614static int 2615pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 2616{ 2617 pfarg_context_t *req = (pfarg_context_t *)arg; 2618 struct file *filp; 2619 struct path path; 2620 int ctx_flags; 2621 int fd; 2622 int ret; 2623 2624 /* let's check the arguments first */ 2625 ret = pfarg_is_sane(current, req); 2626 if (ret < 0) 2627 return ret; 2628 2629 ctx_flags = req->ctx_flags; 2630 2631 ret = -ENOMEM; 2632 2633 fd = get_unused_fd(); 2634 if (fd < 0) 2635 return fd; 2636 2637 ctx = pfm_context_alloc(ctx_flags); 2638 if (!ctx) 2639 goto error; 2640 2641 filp = pfm_alloc_file(ctx); 2642 if (IS_ERR(filp)) { 2643 ret = PTR_ERR(filp); 2644 goto error_file; 2645 } 2646 2647 req->ctx_fd = ctx->ctx_fd = fd; 2648 2649 /* 2650 * does the user want to sample? 2651 */ 2652 if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) { 2653 ret = pfm_setup_buffer_fmt(current, filp, ctx, ctx_flags, 0, req); 2654 if (ret) 2655 goto buffer_error; 2656 } 2657 2658 DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d\n", 2659 ctx, 2660 ctx_flags, 2661 ctx->ctx_fl_system, 2662 ctx->ctx_fl_block, 2663 ctx->ctx_fl_excl_idle, 2664 ctx->ctx_fl_no_msg, 2665 ctx->ctx_fd)); 2666 2667 /* 2668 * initialize soft PMU state 2669 */ 2670 pfm_reset_pmu_state(ctx); 2671 2672 fd_install(fd, filp); 2673 2674 return 0; 2675 2676buffer_error: 2677 path = filp->f_path; 2678 put_filp(filp); 2679 path_put(&path); 2680 2681 if (ctx->ctx_buf_fmt) { 2682 pfm_buf_fmt_exit(ctx->ctx_buf_fmt, current, NULL, regs); 2683 } 2684error_file: 2685 pfm_context_free(ctx); 2686 2687error: 2688 put_unused_fd(fd); 2689 return ret; 2690} 2691 2692static inline unsigned long 2693pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset) 2694{ 2695 unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset; 2696 unsigned long new_seed, old_seed = reg->seed, mask = reg->mask; 2697 extern unsigned long carta_random32 (unsigned long seed); 2698 2699 if (reg->flags & PFM_REGFL_RANDOM) { 2700 new_seed = carta_random32(old_seed); 2701 val -= (old_seed & mask); /* counter values are negative numbers! */ 2702 if ((mask >> 32) != 0) 2703 /* construct a full 64-bit random value: */ 2704 new_seed |= carta_random32(old_seed >> 32) << 32; 2705 reg->seed = new_seed; 2706 } 2707 reg->lval = val; 2708 return val; 2709} 2710 2711static void 2712pfm_reset_regs_masked(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) 2713{ 2714 unsigned long mask = ovfl_regs[0]; 2715 unsigned long reset_others = 0UL; 2716 unsigned long val; 2717 int i; 2718 2719 /* 2720 * now restore reset value on sampling overflowed counters 2721 */ 2722 mask >>= PMU_FIRST_COUNTER; 2723 for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { 2724 2725 if ((mask & 0x1UL) == 0UL) continue; 2726 2727 ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset); 2728 reset_others |= ctx->ctx_pmds[i].reset_pmds[0]; 2729 2730 DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val)); 2731 } 2732 2733 /* 2734 * Now take care of resetting the other registers 2735 */ 2736 for(i = 0; reset_others; i++, reset_others >>= 1) { 2737 2738 if ((reset_others & 0x1) == 0) continue; 2739 2740 ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset); 2741 2742 DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n", 2743 is_long_reset ? "long" : "short", i, val)); 2744 } 2745} 2746 2747static void 2748pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) 2749{ 2750 unsigned long mask = ovfl_regs[0]; 2751 unsigned long reset_others = 0UL; 2752 unsigned long val; 2753 int i; 2754 2755 DPRINT_ovfl(("ovfl_regs=0x%lx is_long_reset=%d\n", ovfl_regs[0], is_long_reset)); 2756 2757 if (ctx->ctx_state == PFM_CTX_MASKED) { 2758 pfm_reset_regs_masked(ctx, ovfl_regs, is_long_reset); 2759 return; 2760 } 2761 2762 /* 2763 * now restore reset value on sampling overflowed counters 2764 */ 2765 mask >>= PMU_FIRST_COUNTER; 2766 for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { 2767 2768 if ((mask & 0x1UL) == 0UL) continue; 2769 2770 val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset); 2771 reset_others |= ctx->ctx_pmds[i].reset_pmds[0]; 2772 2773 DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val)); 2774 2775 pfm_write_soft_counter(ctx, i, val); 2776 } 2777 2778 /* 2779 * Now take care of resetting the other registers 2780 */ 2781 for(i = 0; reset_others; i++, reset_others >>= 1) { 2782 2783 if ((reset_others & 0x1) == 0) continue; 2784 2785 val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset); 2786 2787 if (PMD_IS_COUNTING(i)) { 2788 pfm_write_soft_counter(ctx, i, val); 2789 } else { 2790 ia64_set_pmd(i, val); 2791 } 2792 DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n", 2793 is_long_reset ? "long" : "short", i, val)); 2794 } 2795 ia64_srlz_d(); 2796} 2797 2798static int 2799pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 2800{ 2801 struct task_struct *task; 2802 pfarg_reg_t *req = (pfarg_reg_t *)arg; 2803 unsigned long value, pmc_pm; 2804 unsigned long smpl_pmds, reset_pmds, impl_pmds; 2805 unsigned int cnum, reg_flags, flags, pmc_type; 2806 int i, can_access_pmu = 0, is_loaded, is_system, expert_mode; 2807 int is_monitor, is_counting, state; 2808 int ret = -EINVAL; 2809 pfm_reg_check_t wr_func; 2810#define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z)) 2811 2812 state = ctx->ctx_state; 2813 is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 2814 is_system = ctx->ctx_fl_system; 2815 task = ctx->ctx_task; 2816 impl_pmds = pmu_conf->impl_pmds[0]; 2817 2818 if (state == PFM_CTX_ZOMBIE) return -EINVAL; 2819 2820 if (is_loaded) { 2821 /* 2822 * In system wide and when the context is loaded, access can only happen 2823 * when the caller is running on the CPU being monitored by the session. 2824 * It does not have to be the owner (ctx_task) of the context per se. 2825 */ 2826 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 2827 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 2828 return -EBUSY; 2829 } 2830 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 2831 } 2832 expert_mode = pfm_sysctl.expert_mode; 2833 2834 for (i = 0; i < count; i++, req++) { 2835 2836 cnum = req->reg_num; 2837 reg_flags = req->reg_flags; 2838 value = req->reg_value; 2839 smpl_pmds = req->reg_smpl_pmds[0]; 2840 reset_pmds = req->reg_reset_pmds[0]; 2841 flags = 0; 2842 2843 2844 if (cnum >= PMU_MAX_PMCS) { 2845 DPRINT(("pmc%u is invalid\n", cnum)); 2846 goto error; 2847 } 2848 2849 pmc_type = pmu_conf->pmc_desc[cnum].type; 2850 pmc_pm = (value >> pmu_conf->pmc_desc[cnum].pm_pos) & 0x1; 2851 is_counting = (pmc_type & PFM_REG_COUNTING) == PFM_REG_COUNTING ? 1 : 0; 2852 is_monitor = (pmc_type & PFM_REG_MONITOR) == PFM_REG_MONITOR ? 1 : 0; 2853 2854 /* 2855 * we reject all non implemented PMC as well 2856 * as attempts to modify PMC[0-3] which are used 2857 * as status registers by the PMU 2858 */ 2859 if ((pmc_type & PFM_REG_IMPL) == 0 || (pmc_type & PFM_REG_CONTROL) == PFM_REG_CONTROL) { 2860 DPRINT(("pmc%u is unimplemented or no-access pmc_type=%x\n", cnum, pmc_type)); 2861 goto error; 2862 } 2863 wr_func = pmu_conf->pmc_desc[cnum].write_check; 2864 /* 2865 * If the PMC is a monitor, then if the value is not the default: 2866 * - system-wide session: PMCx.pm=1 (privileged monitor) 2867 * - per-task : PMCx.pm=0 (user monitor) 2868 */ 2869 if (is_monitor && value != PMC_DFL_VAL(cnum) && is_system ^ pmc_pm) { 2870 DPRINT(("pmc%u pmc_pm=%lu is_system=%d\n", 2871 cnum, 2872 pmc_pm, 2873 is_system)); 2874 goto error; 2875 } 2876 2877 if (is_counting) { 2878 /* 2879 * enforce generation of overflow interrupt. Necessary on all 2880 * CPUs. 2881 */ 2882 value |= 1 << PMU_PMC_OI; 2883 2884 if (reg_flags & PFM_REGFL_OVFL_NOTIFY) { 2885 flags |= PFM_REGFL_OVFL_NOTIFY; 2886 } 2887 2888 if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM; 2889 2890 /* verify validity of smpl_pmds */ 2891 if ((smpl_pmds & impl_pmds) != smpl_pmds) { 2892 DPRINT(("invalid smpl_pmds 0x%lx for pmc%u\n", smpl_pmds, cnum)); 2893 goto error; 2894 } 2895 2896 /* verify validity of reset_pmds */ 2897 if ((reset_pmds & impl_pmds) != reset_pmds) { 2898 DPRINT(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum)); 2899 goto error; 2900 } 2901 } else { 2902 if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) { 2903 DPRINT(("cannot set ovfl_notify or random on pmc%u\n", cnum)); 2904 goto error; 2905 } 2906 /* eventid on non-counting monitors are ignored */ 2907 } 2908 2909 /* 2910 * execute write checker, if any 2911 */ 2912 if (likely(expert_mode == 0 && wr_func)) { 2913 ret = (*wr_func)(task, ctx, cnum, &value, regs); 2914 if (ret) goto error; 2915 ret = -EINVAL; 2916 } 2917 2918 /* 2919 * no error on this register 2920 */ 2921 PFM_REG_RETFLAG_SET(req->reg_flags, 0); 2922 2923 /* 2924 * Now we commit the changes to the software state 2925 */ 2926 2927 /* 2928 * update overflow information 2929 */ 2930 if (is_counting) { 2931 /* 2932 * full flag update each time a register is programmed 2933 */ 2934 ctx->ctx_pmds[cnum].flags = flags; 2935 2936 ctx->ctx_pmds[cnum].reset_pmds[0] = reset_pmds; 2937 ctx->ctx_pmds[cnum].smpl_pmds[0] = smpl_pmds; 2938 ctx->ctx_pmds[cnum].eventid = req->reg_smpl_eventid; 2939 2940 /* 2941 * Mark all PMDS to be accessed as used. 2942 * 2943 * We do not keep track of PMC because we have to 2944 * systematically restore ALL of them. 2945 * 2946 * We do not update the used_monitors mask, because 2947 * if we have not programmed them, then will be in 2948 * a quiescent state, therefore we will not need to 2949 * mask/restore then when context is MASKED. 2950 */ 2951 CTX_USED_PMD(ctx, reset_pmds); 2952 CTX_USED_PMD(ctx, smpl_pmds); 2953 /* 2954 * make sure we do not try to reset on 2955 * restart because we have established new values 2956 */ 2957 if (state == PFM_CTX_MASKED) ctx->ctx_ovfl_regs[0] &= ~1UL << cnum; 2958 } 2959 /* 2960 * Needed in case the user does not initialize the equivalent 2961 * PMD. Clearing is done indirectly via pfm_reset_pmu_state() so there is no 2962 * possible leak here. 2963 */ 2964 CTX_USED_PMD(ctx, pmu_conf->pmc_desc[cnum].dep_pmd[0]); 2965 2966 /* 2967 * keep track of the monitor PMC that we are using. 2968 * we save the value of the pmc in ctx_pmcs[] and if 2969 * the monitoring is not stopped for the context we also 2970 * place it in the saved state area so that it will be 2971 * picked up later by the context switch code. 2972 * 2973 * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs(). 2974 * 2975 * The value in th_pmcs[] may be modified on overflow, i.e., when 2976 * monitoring needs to be stopped. 2977 */ 2978 if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum); 2979 2980 /* 2981 * update context state 2982 */ 2983 ctx->ctx_pmcs[cnum] = value; 2984 2985 if (is_loaded) { 2986 /* 2987 * write thread state 2988 */ 2989 if (is_system == 0) ctx->th_pmcs[cnum] = value; 2990 2991 /* 2992 * write hardware register if we can 2993 */ 2994 if (can_access_pmu) { 2995 ia64_set_pmc(cnum, value); 2996 } 2997#ifdef CONFIG_SMP 2998 else { 2999 /* 3000 * per-task SMP only here 3001 * 3002 * we are guaranteed that the task is not running on the other CPU, 3003 * we indicate that this PMD will need to be reloaded if the task 3004 * is rescheduled on the CPU it ran last on. 3005 */ 3006 ctx->ctx_reload_pmcs[0] |= 1UL << cnum; 3007 } 3008#endif 3009 } 3010 3011 DPRINT(("pmc[%u]=0x%lx ld=%d apmu=%d flags=0x%x all_pmcs=0x%lx used_pmds=0x%lx eventid=%ld smpl_pmds=0x%lx reset_pmds=0x%lx reloads_pmcs=0x%lx used_monitors=0x%lx ovfl_regs=0x%lx\n", 3012 cnum, 3013 value, 3014 is_loaded, 3015 can_access_pmu, 3016 flags, 3017 ctx->ctx_all_pmcs[0], 3018 ctx->ctx_used_pmds[0], 3019 ctx->ctx_pmds[cnum].eventid, 3020 smpl_pmds, 3021 reset_pmds, 3022 ctx->ctx_reload_pmcs[0], 3023 ctx->ctx_used_monitors[0], 3024 ctx->ctx_ovfl_regs[0])); 3025 } 3026 3027 /* 3028 * make sure the changes are visible 3029 */ 3030 if (can_access_pmu) ia64_srlz_d(); 3031 3032 return 0; 3033error: 3034 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 3035 return ret; 3036} 3037 3038static int 3039pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3040{ 3041 struct task_struct *task; 3042 pfarg_reg_t *req = (pfarg_reg_t *)arg; 3043 unsigned long value, hw_value, ovfl_mask; 3044 unsigned int cnum; 3045 int i, can_access_pmu = 0, state; 3046 int is_counting, is_loaded, is_system, expert_mode; 3047 int ret = -EINVAL; 3048 pfm_reg_check_t wr_func; 3049 3050 3051 state = ctx->ctx_state; 3052 is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 3053 is_system = ctx->ctx_fl_system; 3054 ovfl_mask = pmu_conf->ovfl_val; 3055 task = ctx->ctx_task; 3056 3057 if (unlikely(state == PFM_CTX_ZOMBIE)) return -EINVAL; 3058 3059 /* 3060 * on both UP and SMP, we can only write to the PMC when the task is 3061 * the owner of the local PMU. 3062 */ 3063 if (likely(is_loaded)) { 3064 /* 3065 * In system wide and when the context is loaded, access can only happen 3066 * when the caller is running on the CPU being monitored by the session. 3067 * It does not have to be the owner (ctx_task) of the context per se. 3068 */ 3069 if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { 3070 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3071 return -EBUSY; 3072 } 3073 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 3074 } 3075 expert_mode = pfm_sysctl.expert_mode; 3076 3077 for (i = 0; i < count; i++, req++) { 3078 3079 cnum = req->reg_num; 3080 value = req->reg_value; 3081 3082 if (!PMD_IS_IMPL(cnum)) { 3083 DPRINT(("pmd[%u] is unimplemented or invalid\n", cnum)); 3084 goto abort_mission; 3085 } 3086 is_counting = PMD_IS_COUNTING(cnum); 3087 wr_func = pmu_conf->pmd_desc[cnum].write_check; 3088 3089 /* 3090 * execute write checker, if any 3091 */ 3092 if (unlikely(expert_mode == 0 && wr_func)) { 3093 unsigned long v = value; 3094 3095 ret = (*wr_func)(task, ctx, cnum, &v, regs); 3096 if (ret) goto abort_mission; 3097 3098 value = v; 3099 ret = -EINVAL; 3100 } 3101 3102 /* 3103 * no error on this register 3104 */ 3105 PFM_REG_RETFLAG_SET(req->reg_flags, 0); 3106 3107 /* 3108 * now commit changes to software state 3109 */ 3110 hw_value = value; 3111 3112 /* 3113 * update virtualized (64bits) counter 3114 */ 3115 if (is_counting) { 3116 /* 3117 * write context state 3118 */ 3119 ctx->ctx_pmds[cnum].lval = value; 3120 3121 /* 3122 * when context is load we use the split value 3123 */ 3124 if (is_loaded) { 3125 hw_value = value & ovfl_mask; 3126 value = value & ~ovfl_mask; 3127 } 3128 } 3129 /* 3130 * update reset values (not just for counters) 3131 */ 3132 ctx->ctx_pmds[cnum].long_reset = req->reg_long_reset; 3133 ctx->ctx_pmds[cnum].short_reset = req->reg_short_reset; 3134 3135 /* 3136 * update randomization parameters (not just for counters) 3137 */ 3138 ctx->ctx_pmds[cnum].seed = req->reg_random_seed; 3139 ctx->ctx_pmds[cnum].mask = req->reg_random_mask; 3140 3141 /* 3142 * update context value 3143 */ 3144 ctx->ctx_pmds[cnum].val = value; 3145 3146 /* 3147 * Keep track of what we use 3148 * 3149 * We do not keep track of PMC because we have to 3150 * systematically restore ALL of them. 3151 */ 3152 CTX_USED_PMD(ctx, PMD_PMD_DEP(cnum)); 3153 3154 /* 3155 * mark this PMD register used as well 3156 */ 3157 CTX_USED_PMD(ctx, RDEP(cnum)); 3158 3159 /* 3160 * make sure we do not try to reset on 3161 * restart because we have established new values 3162 */ 3163 if (is_counting && state == PFM_CTX_MASKED) { 3164 ctx->ctx_ovfl_regs[0] &= ~1UL << cnum; 3165 } 3166 3167 if (is_loaded) { 3168 /* 3169 * write thread state 3170 */ 3171 if (is_system == 0) ctx->th_pmds[cnum] = hw_value; 3172 3173 /* 3174 * write hardware register if we can 3175 */ 3176 if (can_access_pmu) { 3177 ia64_set_pmd(cnum, hw_value); 3178 } else { 3179#ifdef CONFIG_SMP 3180 /* 3181 * we are guaranteed that the task is not running on the other CPU, 3182 * we indicate that this PMD will need to be reloaded if the task 3183 * is rescheduled on the CPU it ran last on. 3184 */ 3185 ctx->ctx_reload_pmds[0] |= 1UL << cnum; 3186#endif 3187 } 3188 } 3189 3190 DPRINT(("pmd[%u]=0x%lx ld=%d apmu=%d, hw_value=0x%lx ctx_pmd=0x%lx short_reset=0x%lx " 3191 "long_reset=0x%lx notify=%c seed=0x%lx mask=0x%lx used_pmds=0x%lx reset_pmds=0x%lx reload_pmds=0x%lx all_pmds=0x%lx ovfl_regs=0x%lx\n", 3192 cnum, 3193 value, 3194 is_loaded, 3195 can_access_pmu, 3196 hw_value, 3197 ctx->ctx_pmds[cnum].val, 3198 ctx->ctx_pmds[cnum].short_reset, 3199 ctx->ctx_pmds[cnum].long_reset, 3200 PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N', 3201 ctx->ctx_pmds[cnum].seed, 3202 ctx->ctx_pmds[cnum].mask, 3203 ctx->ctx_used_pmds[0], 3204 ctx->ctx_pmds[cnum].reset_pmds[0], 3205 ctx->ctx_reload_pmds[0], 3206 ctx->ctx_all_pmds[0], 3207 ctx->ctx_ovfl_regs[0])); 3208 } 3209 3210 /* 3211 * make changes visible 3212 */ 3213 if (can_access_pmu) ia64_srlz_d(); 3214 3215 return 0; 3216 3217abort_mission: 3218 /* 3219 * for now, we have only one possibility for error 3220 */ 3221 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 3222 return ret; 3223} 3224 3225/* 3226 * By the way of PROTECT_CONTEXT(), interrupts are masked while we are in this function. 3227 * Therefore we know, we do not have to worry about the PMU overflow interrupt. If an 3228 * interrupt is delivered during the call, it will be kept pending until we leave, making 3229 * it appears as if it had been generated at the UNPROTECT_CONTEXT(). At least we are 3230 * guaranteed to return consistent data to the user, it may simply be old. It is not 3231 * trivial to treat the overflow while inside the call because you may end up in 3232 * some module sampling buffer code causing deadlocks. 3233 */ 3234static int 3235pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3236{ 3237 struct task_struct *task; 3238 unsigned long val = 0UL, lval, ovfl_mask, sval; 3239 pfarg_reg_t *req = (pfarg_reg_t *)arg; 3240 unsigned int cnum, reg_flags = 0; 3241 int i, can_access_pmu = 0, state; 3242 int is_loaded, is_system, is_counting, expert_mode; 3243 int ret = -EINVAL; 3244 pfm_reg_check_t rd_func; 3245 3246 /* 3247 * access is possible when loaded only for 3248 * self-monitoring tasks or in UP mode 3249 */ 3250 3251 state = ctx->ctx_state; 3252 is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 3253 is_system = ctx->ctx_fl_system; 3254 ovfl_mask = pmu_conf->ovfl_val; 3255 task = ctx->ctx_task; 3256 3257 if (state == PFM_CTX_ZOMBIE) return -EINVAL; 3258 3259 if (likely(is_loaded)) { 3260 /* 3261 * In system wide and when the context is loaded, access can only happen 3262 * when the caller is running on the CPU being monitored by the session. 3263 * It does not have to be the owner (ctx_task) of the context per se. 3264 */ 3265 if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { 3266 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3267 return -EBUSY; 3268 } 3269 /* 3270 * this can be true when not self-monitoring only in UP 3271 */ 3272 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 3273 3274 if (can_access_pmu) ia64_srlz_d(); 3275 } 3276 expert_mode = pfm_sysctl.expert_mode; 3277 3278 DPRINT(("ld=%d apmu=%d ctx_state=%d\n", 3279 is_loaded, 3280 can_access_pmu, 3281 state)); 3282 3283 /* 3284 * on both UP and SMP, we can only read the PMD from the hardware register when 3285 * the task is the owner of the local PMU. 3286 */ 3287 3288 for (i = 0; i < count; i++, req++) { 3289 3290 cnum = req->reg_num; 3291 reg_flags = req->reg_flags; 3292 3293 if (unlikely(!PMD_IS_IMPL(cnum))) goto error; 3294 /* 3295 * we can only read the register that we use. That includes 3296 * the one we explicitly initialize AND the one we want included 3297 * in the sampling buffer (smpl_regs). 3298 * 3299 * Having this restriction allows optimization in the ctxsw routine 3300 * without compromising security (leaks) 3301 */ 3302 if (unlikely(!CTX_IS_USED_PMD(ctx, cnum))) goto error; 3303 3304 sval = ctx->ctx_pmds[cnum].val; 3305 lval = ctx->ctx_pmds[cnum].lval; 3306 is_counting = PMD_IS_COUNTING(cnum); 3307 3308 /* 3309 * If the task is not the current one, then we check if the 3310 * PMU state is still in the local live register due to lazy ctxsw. 3311 * If true, then we read directly from the registers. 3312 */ 3313 if (can_access_pmu){ 3314 val = ia64_get_pmd(cnum); 3315 } else { 3316 /* 3317 * context has been saved 3318 * if context is zombie, then task does not exist anymore. 3319 * In this case, we use the full value saved in the context (pfm_flush_regs()). 3320 */ 3321 val = is_loaded ? ctx->th_pmds[cnum] : 0UL; 3322 } 3323 rd_func = pmu_conf->pmd_desc[cnum].read_check; 3324 3325 if (is_counting) { 3326 val &= ovfl_mask; 3327 val += sval; 3328 } 3329 3330 /* 3331 * execute read checker, if any 3332 */ 3333 if (unlikely(expert_mode == 0 && rd_func)) { 3334 unsigned long v = val; 3335 ret = (*rd_func)(ctx->ctx_task, ctx, cnum, &v, regs); 3336 if (ret) goto error; 3337 val = v; 3338 ret = -EINVAL; 3339 } 3340 3341 PFM_REG_RETFLAG_SET(reg_flags, 0); 3342 3343 DPRINT(("pmd[%u]=0x%lx\n", cnum, val)); 3344 3345 /* 3346 * update register return value, abort all if problem during copy. 3347 * we only modify the reg_flags field. no check mode is fine because 3348 * access has been verified upfront in sys_perfmonctl(). 3349 */ 3350 req->reg_value = val; 3351 req->reg_flags = reg_flags; 3352 req->reg_last_reset_val = lval; 3353 } 3354 3355 return 0; 3356 3357error: 3358 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 3359 return ret; 3360} 3361 3362int 3363pfm_mod_write_pmcs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3364{ 3365 pfm_context_t *ctx; 3366 3367 if (req == NULL) return -EINVAL; 3368 3369 ctx = GET_PMU_CTX(); 3370 3371 if (ctx == NULL) return -EINVAL; 3372 3373 /* 3374 * for now limit to current task, which is enough when calling 3375 * from overflow handler 3376 */ 3377 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3378 3379 return pfm_write_pmcs(ctx, req, nreq, regs); 3380} 3381EXPORT_SYMBOL(pfm_mod_write_pmcs); 3382 3383int 3384pfm_mod_read_pmds(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3385{ 3386 pfm_context_t *ctx; 3387 3388 if (req == NULL) return -EINVAL; 3389 3390 ctx = GET_PMU_CTX(); 3391 3392 if (ctx == NULL) return -EINVAL; 3393 3394 /* 3395 * for now limit to current task, which is enough when calling 3396 * from overflow handler 3397 */ 3398 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3399 3400 return pfm_read_pmds(ctx, req, nreq, regs); 3401} 3402EXPORT_SYMBOL(pfm_mod_read_pmds); 3403 3404/* 3405 * Only call this function when a process it trying to 3406 * write the debug registers (reading is always allowed) 3407 */ 3408int 3409pfm_use_debug_registers(struct task_struct *task) 3410{ 3411 pfm_context_t *ctx = task->thread.pfm_context; 3412 unsigned long flags; 3413 int ret = 0; 3414 3415 if (pmu_conf->use_rr_dbregs == 0) return 0; 3416 3417 DPRINT(("called for [%d]\n", task_pid_nr(task))); 3418 3419 /* 3420 * do it only once 3421 */ 3422 if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0; 3423 3424 /* 3425 * Even on SMP, we do not need to use an atomic here because 3426 * the only way in is via ptrace() and this is possible only when the 3427 * process is stopped. Even in the case where the ctxsw out is not totally 3428 * completed by the time we come here, there is no way the 'stopped' process 3429 * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine. 3430 * So this is always safe. 3431 */ 3432 if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1; 3433 3434 LOCK_PFS(flags); 3435 3436 /* 3437 * We cannot allow setting breakpoints when system wide monitoring 3438 * sessions are using the debug registers. 3439 */ 3440 if (pfm_sessions.pfs_sys_use_dbregs> 0) 3441 ret = -1; 3442 else 3443 pfm_sessions.pfs_ptrace_use_dbregs++; 3444 3445 DPRINT(("ptrace_use_dbregs=%u sys_use_dbregs=%u by [%d] ret = %d\n", 3446 pfm_sessions.pfs_ptrace_use_dbregs, 3447 pfm_sessions.pfs_sys_use_dbregs, 3448 task_pid_nr(task), ret)); 3449 3450 UNLOCK_PFS(flags); 3451 3452 return ret; 3453} 3454 3455/* 3456 * This function is called for every task that exits with the 3457 * IA64_THREAD_DBG_VALID set. This indicates a task which was 3458 * able to use the debug registers for debugging purposes via 3459 * ptrace(). Therefore we know it was not using them for 3460 * performance monitoring, so we only decrement the number 3461 * of "ptraced" debug register users to keep the count up to date 3462 */ 3463int 3464pfm_release_debug_registers(struct task_struct *task) 3465{ 3466 unsigned long flags; 3467 int ret; 3468 3469 if (pmu_conf->use_rr_dbregs == 0) return 0; 3470 3471 LOCK_PFS(flags); 3472 if (pfm_sessions.pfs_ptrace_use_dbregs == 0) { 3473 printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task_pid_nr(task)); 3474 ret = -1; 3475 } else { 3476 pfm_sessions.pfs_ptrace_use_dbregs--; 3477 ret = 0; 3478 } 3479 UNLOCK_PFS(flags); 3480 3481 return ret; 3482} 3483 3484static int 3485pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3486{ 3487 struct task_struct *task; 3488 pfm_buffer_fmt_t *fmt; 3489 pfm_ovfl_ctrl_t rst_ctrl; 3490 int state, is_system; 3491 int ret = 0; 3492 3493 state = ctx->ctx_state; 3494 fmt = ctx->ctx_buf_fmt; 3495 is_system = ctx->ctx_fl_system; 3496 task = PFM_CTX_TASK(ctx); 3497 3498 switch(state) { 3499 case PFM_CTX_MASKED: 3500 break; 3501 case PFM_CTX_LOADED: 3502 if (CTX_HAS_SMPL(ctx) && fmt->fmt_restart_active) break; 3503 /* fall through */ 3504 case PFM_CTX_UNLOADED: 3505 case PFM_CTX_ZOMBIE: 3506 DPRINT(("invalid state=%d\n", state)); 3507 return -EBUSY; 3508 default: 3509 DPRINT(("state=%d, cannot operate (no active_restart handler)\n", state)); 3510 return -EINVAL; 3511 } 3512 3513 /* 3514 * In system wide and when the context is loaded, access can only happen 3515 * when the caller is running on the CPU being monitored by the session. 3516 * It does not have to be the owner (ctx_task) of the context per se. 3517 */ 3518 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 3519 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3520 return -EBUSY; 3521 } 3522 3523 /* sanity check */ 3524 if (unlikely(task == NULL)) { 3525 printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", task_pid_nr(current)); 3526 return -EINVAL; 3527 } 3528 3529 if (task == current || is_system) { 3530 3531 fmt = ctx->ctx_buf_fmt; 3532 3533 DPRINT(("restarting self %d ovfl=0x%lx\n", 3534 task_pid_nr(task), 3535 ctx->ctx_ovfl_regs[0])); 3536 3537 if (CTX_HAS_SMPL(ctx)) { 3538 3539 prefetch(ctx->ctx_smpl_hdr); 3540 3541 rst_ctrl.bits.mask_monitoring = 0; 3542 rst_ctrl.bits.reset_ovfl_pmds = 0; 3543 3544 if (state == PFM_CTX_LOADED) 3545 ret = pfm_buf_fmt_restart_active(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 3546 else 3547 ret = pfm_buf_fmt_restart(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 3548 } else { 3549 rst_ctrl.bits.mask_monitoring = 0; 3550 rst_ctrl.bits.reset_ovfl_pmds = 1; 3551 } 3552 3553 if (ret == 0) { 3554 if (rst_ctrl.bits.reset_ovfl_pmds) 3555 pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET); 3556 3557 if (rst_ctrl.bits.mask_monitoring == 0) { 3558 DPRINT(("resuming monitoring for [%d]\n", task_pid_nr(task))); 3559 3560 if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task); 3561 } else { 3562 DPRINT(("keeping monitoring stopped for [%d]\n", task_pid_nr(task))); 3563 3564 // cannot use pfm_stop_monitoring(task, regs); 3565 } 3566 } 3567 /* 3568 * clear overflowed PMD mask to remove any stale information 3569 */ 3570 ctx->ctx_ovfl_regs[0] = 0UL; 3571 3572 /* 3573 * back to LOADED state 3574 */ 3575 ctx->ctx_state = PFM_CTX_LOADED; 3576 3577 ctx->ctx_fl_can_restart = 0; 3578 3579 return 0; 3580 } 3581 3582 /* 3583 * restart another task 3584 */ 3585 3586 /* 3587 * When PFM_CTX_MASKED, we cannot issue a restart before the previous 3588 * one is seen by the task. 3589 */ 3590 if (state == PFM_CTX_MASKED) { 3591 if (ctx->ctx_fl_can_restart == 0) return -EINVAL; 3592 /* 3593 * will prevent subsequent restart before this one is 3594 * seen by other task 3595 */ 3596 ctx->ctx_fl_can_restart = 0; 3597 } 3598 3599 /* 3600 * if blocking, then post the semaphore is PFM_CTX_MASKED, i.e. 3601 * the task is blocked or on its way to block. That's the normal 3602 * restart path. If the monitoring is not masked, then the task 3603 * can be actively monitoring and we cannot directly intervene. 3604 * Therefore we use the trap mechanism to catch the task and 3605 * force it to reset the buffer/reset PMDs. 3606 * 3607 * if non-blocking, then we ensure that the task will go into 3608 * pfm_handle_work() before returning to user mode. 3609 * 3610 * We cannot explicitly reset another task, it MUST always 3611 * be done by the task itself. This works for system wide because 3612 * the tool that is controlling the session is logically doing 3613 * "self-monitoring". 3614 */ 3615 if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) { 3616 DPRINT(("unblocking [%d]\n", task_pid_nr(task))); 3617 complete(&ctx->ctx_restart_done); 3618 } else { 3619 DPRINT(("[%d] armed exit trap\n", task_pid_nr(task))); 3620 3621 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET; 3622 3623 PFM_SET_WORK_PENDING(task, 1); 3624 3625 set_notify_resume(task); 3626 3627 } 3628 return 0; 3629} 3630 3631static int 3632pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3633{ 3634 unsigned int m = *(unsigned int *)arg; 3635 3636 pfm_sysctl.debug = m == 0 ? 0 : 1; 3637 3638 printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off"); 3639 3640 if (m == 0) { 3641 memset(pfm_stats, 0, sizeof(pfm_stats)); 3642 for(m=0; m < NR_CPUS; m++) pfm_stats[m].pfm_ovfl_intr_cycles_min = ~0UL; 3643 } 3644 return 0; 3645} 3646 3647/* 3648 * arg can be NULL and count can be zero for this function 3649 */ 3650static int 3651pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3652{ 3653 struct thread_struct *thread = NULL; 3654 struct task_struct *task; 3655 pfarg_dbreg_t *req = (pfarg_dbreg_t *)arg; 3656 unsigned long flags; 3657 dbreg_t dbreg; 3658 unsigned int rnum; 3659 int first_time; 3660 int ret = 0, state; 3661 int i, can_access_pmu = 0; 3662 int is_system, is_loaded; 3663 3664 if (pmu_conf->use_rr_dbregs == 0) return -EINVAL; 3665 3666 state = ctx->ctx_state; 3667 is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 3668 is_system = ctx->ctx_fl_system; 3669 task = ctx->ctx_task; 3670 3671 if (state == PFM_CTX_ZOMBIE) return -EINVAL; 3672 3673 /* 3674 * on both UP and SMP, we can only write to the PMC when the task is 3675 * the owner of the local PMU. 3676 */ 3677 if (is_loaded) { 3678 thread = &task->thread; 3679 /* 3680 * In system wide and when the context is loaded, access can only happen 3681 * when the caller is running on the CPU being monitored by the session. 3682 * It does not have to be the owner (ctx_task) of the context per se. 3683 */ 3684 if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { 3685 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3686 return -EBUSY; 3687 } 3688 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 3689 } 3690 3691 /* 3692 * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w 3693 * ensuring that no real breakpoint can be installed via this call. 3694 * 3695 * IMPORTANT: regs can be NULL in this function 3696 */ 3697 3698 first_time = ctx->ctx_fl_using_dbreg == 0; 3699 3700 /* 3701 * don't bother if we are loaded and task is being debugged 3702 */ 3703 if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) { 3704 DPRINT(("debug registers already in use for [%d]\n", task_pid_nr(task))); 3705 return -EBUSY; 3706 } 3707 3708 /* 3709 * check for debug registers in system wide mode 3710 * 3711 * If though a check is done in pfm_context_load(), 3712 * we must repeat it here, in case the registers are 3713 * written after the context is loaded 3714 */ 3715 if (is_loaded) { 3716 LOCK_PFS(flags); 3717 3718 if (first_time && is_system) { 3719 if (pfm_sessions.pfs_ptrace_use_dbregs) 3720 ret = -EBUSY; 3721 else 3722 pfm_sessions.pfs_sys_use_dbregs++; 3723 } 3724 UNLOCK_PFS(flags); 3725 } 3726 3727 if (ret != 0) return ret; 3728 3729 /* 3730 * mark ourself as user of the debug registers for 3731 * perfmon purposes. 3732 */ 3733 ctx->ctx_fl_using_dbreg = 1; 3734 3735 /* 3736 * clear hardware registers to make sure we don't 3737 * pick up stale state. 3738 * 3739 * for a system wide session, we do not use 3740 * thread.dbr, thread.ibr because this process 3741 * never leaves the current CPU and the state 3742 * is shared by all processes running on it 3743 */ 3744 if (first_time && can_access_pmu) { 3745 DPRINT(("[%d] clearing ibrs, dbrs\n", task_pid_nr(task))); 3746 for (i=0; i < pmu_conf->num_ibrs; i++) { 3747 ia64_set_ibr(i, 0UL); 3748 ia64_dv_serialize_instruction(); 3749 } 3750 ia64_srlz_i(); 3751 for (i=0; i < pmu_conf->num_dbrs; i++) { 3752 ia64_set_dbr(i, 0UL); 3753 ia64_dv_serialize_data(); 3754 } 3755 ia64_srlz_d(); 3756 } 3757 3758 /* 3759 * Now install the values into the registers 3760 */ 3761 for (i = 0; i < count; i++, req++) { 3762 3763 rnum = req->dbreg_num; 3764 dbreg.val = req->dbreg_value; 3765 3766 ret = -EINVAL; 3767 3768 if ((mode == PFM_CODE_RR && rnum >= PFM_NUM_IBRS) || ((mode == PFM_DATA_RR) && rnum >= PFM_NUM_DBRS)) { 3769 DPRINT(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n", 3770 rnum, dbreg.val, mode, i, count)); 3771 3772 goto abort_mission; 3773 } 3774 3775 /* 3776 * make sure we do not install enabled breakpoint 3777 */ 3778 if (rnum & 0x1) { 3779 if (mode == PFM_CODE_RR) 3780 dbreg.ibr.ibr_x = 0; 3781 else 3782 dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0; 3783 } 3784 3785 PFM_REG_RETFLAG_SET(req->dbreg_flags, 0); 3786 3787 /* 3788 * Debug registers, just like PMC, can only be modified 3789 * by a kernel call. Moreover, perfmon() access to those 3790 * registers are centralized in this routine. The hardware 3791 * does not modify the value of these registers, therefore, 3792 * if we save them as they are written, we can avoid having 3793 * to save them on context switch out. This is made possible 3794 * by the fact that when perfmon uses debug registers, ptrace() 3795 * won't be able to modify them concurrently. 3796 */ 3797 if (mode == PFM_CODE_RR) { 3798 CTX_USED_IBR(ctx, rnum); 3799 3800 if (can_access_pmu) { 3801 ia64_set_ibr(rnum, dbreg.val); 3802 ia64_dv_serialize_instruction(); 3803 } 3804 3805 ctx->ctx_ibrs[rnum] = dbreg.val; 3806 3807 DPRINT(("write ibr%u=0x%lx used_ibrs=0x%x ld=%d apmu=%d\n", 3808 rnum, dbreg.val, ctx->ctx_used_ibrs[0], is_loaded, can_access_pmu)); 3809 } else { 3810 CTX_USED_DBR(ctx, rnum); 3811 3812 if (can_access_pmu) { 3813 ia64_set_dbr(rnum, dbreg.val); 3814 ia64_dv_serialize_data(); 3815 } 3816 ctx->ctx_dbrs[rnum] = dbreg.val; 3817 3818 DPRINT(("write dbr%u=0x%lx used_dbrs=0x%x ld=%d apmu=%d\n", 3819 rnum, dbreg.val, ctx->ctx_used_dbrs[0], is_loaded, can_access_pmu)); 3820 } 3821 } 3822 3823 return 0; 3824 3825abort_mission: 3826 /* 3827 * in case it was our first attempt, we undo the global modifications 3828 */ 3829 if (first_time) { 3830 LOCK_PFS(flags); 3831 if (ctx->ctx_fl_system) { 3832 pfm_sessions.pfs_sys_use_dbregs--; 3833 } 3834 UNLOCK_PFS(flags); 3835 ctx->ctx_fl_using_dbreg = 0; 3836 } 3837 /* 3838 * install error return flag 3839 */ 3840 PFM_REG_RETFLAG_SET(req->dbreg_flags, PFM_REG_RETFL_EINVAL); 3841 3842 return ret; 3843} 3844 3845static int 3846pfm_write_ibrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3847{ 3848 return pfm_write_ibr_dbr(PFM_CODE_RR, ctx, arg, count, regs); 3849} 3850 3851static int 3852pfm_write_dbrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3853{ 3854 return pfm_write_ibr_dbr(PFM_DATA_RR, ctx, arg, count, regs); 3855} 3856 3857int 3858pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3859{ 3860 pfm_context_t *ctx; 3861 3862 if (req == NULL) return -EINVAL; 3863 3864 ctx = GET_PMU_CTX(); 3865 3866 if (ctx == NULL) return -EINVAL; 3867 3868 /* 3869 * for now limit to current task, which is enough when calling 3870 * from overflow handler 3871 */ 3872 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3873 3874 return pfm_write_ibrs(ctx, req, nreq, regs); 3875} 3876EXPORT_SYMBOL(pfm_mod_write_ibrs); 3877 3878int 3879pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3880{ 3881 pfm_context_t *ctx; 3882 3883 if (req == NULL) return -EINVAL; 3884 3885 ctx = GET_PMU_CTX(); 3886 3887 if (ctx == NULL) return -EINVAL; 3888 3889 /* 3890 * for now limit to current task, which is enough when calling 3891 * from overflow handler 3892 */ 3893 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3894 3895 return pfm_write_dbrs(ctx, req, nreq, regs); 3896} 3897EXPORT_SYMBOL(pfm_mod_write_dbrs); 3898 3899 3900static int 3901pfm_get_features(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3902{ 3903 pfarg_features_t *req = (pfarg_features_t *)arg; 3904 3905 req->ft_version = PFM_VERSION; 3906 return 0; 3907} 3908 3909static int 3910pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3911{ 3912 struct pt_regs *tregs; 3913 struct task_struct *task = PFM_CTX_TASK(ctx); 3914 int state, is_system; 3915 3916 state = ctx->ctx_state; 3917 is_system = ctx->ctx_fl_system; 3918 3919 /* 3920 * context must be attached to issue the stop command (includes LOADED,MASKED,ZOMBIE) 3921 */ 3922 if (state == PFM_CTX_UNLOADED) return -EINVAL; 3923 3924 /* 3925 * In system wide and when the context is loaded, access can only happen 3926 * when the caller is running on the CPU being monitored by the session. 3927 * It does not have to be the owner (ctx_task) of the context per se. 3928 */ 3929 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 3930 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3931 return -EBUSY; 3932 } 3933 DPRINT(("task [%d] ctx_state=%d is_system=%d\n", 3934 task_pid_nr(PFM_CTX_TASK(ctx)), 3935 state, 3936 is_system)); 3937 /* 3938 * in system mode, we need to update the PMU directly 3939 * and the user level state of the caller, which may not 3940 * necessarily be the creator of the context. 3941 */ 3942 if (is_system) { 3943 /* 3944 * Update local PMU first 3945 * 3946 * disable dcr pp 3947 */ 3948 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); 3949 ia64_srlz_i(); 3950 3951 /* 3952 * update local cpuinfo 3953 */ 3954 PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); 3955 3956 /* 3957 * stop monitoring, does srlz.i 3958 */ 3959 pfm_clear_psr_pp(); 3960 3961 /* 3962 * stop monitoring in the caller 3963 */ 3964 ia64_psr(regs)->pp = 0; 3965 3966 return 0; 3967 } 3968 /* 3969 * per-task mode 3970 */ 3971 3972 if (task == current) { 3973 /* stop monitoring at kernel level */ 3974 pfm_clear_psr_up(); 3975 3976 /* 3977 * stop monitoring at the user level 3978 */ 3979 ia64_psr(regs)->up = 0; 3980 } else { 3981 tregs = task_pt_regs(task); 3982 3983 /* 3984 * stop monitoring at the user level 3985 */ 3986 ia64_psr(tregs)->up = 0; 3987 3988 /* 3989 * monitoring disabled in kernel at next reschedule 3990 */ 3991 ctx->ctx_saved_psr_up = 0; 3992 DPRINT(("task=[%d]\n", task_pid_nr(task))); 3993 } 3994 return 0; 3995} 3996 3997 3998static int 3999pfm_start(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 4000{ 4001 struct pt_regs *tregs; 4002 int state, is_system; 4003 4004 state = ctx->ctx_state; 4005 is_system = ctx->ctx_fl_system; 4006 4007 if (state != PFM_CTX_LOADED) return -EINVAL; 4008 4009 /* 4010 * In system wide and when the context is loaded, access can only happen 4011 * when the caller is running on the CPU being monitored by the session. 4012 * It does not have to be the owner (ctx_task) of the context per se. 4013 */ 4014 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 4015 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 4016 return -EBUSY; 4017 } 4018 4019 /* 4020 * in system mode, we need to update the PMU directly 4021 * and the user level state of the caller, which may not 4022 * necessarily be the creator of the context. 4023 */ 4024 if (is_system) { 4025 4026 /* 4027 * set user level psr.pp for the caller 4028 */ 4029 ia64_psr(regs)->pp = 1; 4030 4031 /* 4032 * now update the local PMU and cpuinfo 4033 */ 4034 PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP); 4035 4036 /* 4037 * start monitoring at kernel level 4038 */ 4039 pfm_set_psr_pp(); 4040 4041 /* enable dcr pp */ 4042 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); 4043 ia64_srlz_i(); 4044 4045 return 0; 4046 } 4047 4048 /* 4049 * per-process mode 4050 */ 4051 4052 if (ctx->ctx_task == current) { 4053 4054 /* start monitoring at kernel level */ 4055 pfm_set_psr_up(); 4056 4057 /* 4058 * activate monitoring at user level 4059 */ 4060 ia64_psr(regs)->up = 1; 4061 4062 } else { 4063 tregs = task_pt_regs(ctx->ctx_task); 4064 4065 /* 4066 * start monitoring at the kernel level the next 4067 * time the task is scheduled 4068 */ 4069 ctx->ctx_saved_psr_up = IA64_PSR_UP; 4070 4071 /* 4072 * activate monitoring at user level 4073 */ 4074 ia64_psr(tregs)->up = 1; 4075 } 4076 return 0; 4077} 4078 4079static int 4080pfm_get_pmc_reset(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 4081{ 4082 pfarg_reg_t *req = (pfarg_reg_t *)arg; 4083 unsigned int cnum; 4084 int i; 4085 int ret = -EINVAL; 4086 4087 for (i = 0; i < count; i++, req++) { 4088 4089 cnum = req->reg_num; 4090 4091 if (!PMC_IS_IMPL(cnum)) goto abort_mission; 4092 4093 req->reg_value = PMC_DFL_VAL(cnum); 4094 4095 PFM_REG_RETFLAG_SET(req->reg_flags, 0); 4096 4097 DPRINT(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, req->reg_value)); 4098 } 4099 return 0; 4100 4101abort_mission: 4102 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 4103 return ret; 4104} 4105 4106static int 4107pfm_check_task_exist(pfm_context_t *ctx) 4108{ 4109 struct task_struct *g, *t; 4110 int ret = -ESRCH; 4111 4112 read_lock(&tasklist_lock); 4113 4114 do_each_thread (g, t) { 4115 if (t->thread.pfm_context == ctx) { 4116 ret = 0; 4117 goto out; 4118 } 4119 } while_each_thread (g, t); 4120out: 4121 read_unlock(&tasklist_lock); 4122 4123 DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx)); 4124 4125 return ret; 4126} 4127 4128static int 4129pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 4130{ 4131 struct task_struct *task; 4132 struct thread_struct *thread; 4133 struct pfm_context_t *old; 4134 unsigned long flags; 4135#ifndef CONFIG_SMP 4136 struct task_struct *owner_task = NULL; 4137#endif 4138 pfarg_load_t *req = (pfarg_load_t *)arg; 4139 unsigned long *pmcs_source, *pmds_source; 4140 int the_cpu; 4141 int ret = 0; 4142 int state, is_system, set_dbregs = 0; 4143 4144 state = ctx->ctx_state; 4145 is_system = ctx->ctx_fl_system; 4146 /* 4147 * can only load from unloaded or terminated state 4148 */ 4149 if (state != PFM_CTX_UNLOADED) { 4150 DPRINT(("cannot load to [%d], invalid ctx_state=%d\n", 4151 req->load_pid, 4152 ctx->ctx_state)); 4153 return -EBUSY; 4154 } 4155 4156 DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg)); 4157 4158 if (CTX_OVFL_NOBLOCK(ctx) == 0 && req->load_pid == current->pid) { 4159 DPRINT(("cannot use blocking mode on self\n")); 4160 return -EINVAL; 4161 } 4162 4163 ret = pfm_get_task(ctx, req->load_pid, &task); 4164 if (ret) { 4165 DPRINT(("load_pid [%d] get_task=%d\n", req->load_pid, ret)); 4166 return ret; 4167 } 4168 4169 ret = -EINVAL; 4170 4171 /* 4172 * system wide is self monitoring only 4173 */ 4174 if (is_system && task != current) { 4175 DPRINT(("system wide is self monitoring only load_pid=%d\n", 4176 req->load_pid)); 4177 goto error; 4178 } 4179 4180 thread = &task->thread; 4181 4182 ret = 0; 4183 /* 4184 * cannot load a context which is using range restrictions, 4185 * into a task that is being debugged. 4186 */ 4187 if (ctx->ctx_fl_using_dbreg) { 4188 if (thread->flags & IA64_THREAD_DBG_VALID) { 4189 ret = -EBUSY; 4190 DPRINT(("load_pid [%d] task is debugged, cannot load range restrictions\n", req->load_pid)); 4191 goto error; 4192 } 4193 LOCK_PFS(flags); 4194 4195 if (is_system) { 4196 if (pfm_sessions.pfs_ptrace_use_dbregs) { 4197 DPRINT(("cannot load [%d] dbregs in use\n", 4198 task_pid_nr(task))); 4199 ret = -EBUSY; 4200 } else { 4201 pfm_sessions.pfs_sys_use_dbregs++; 4202 DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task_pid_nr(task), pfm_sessions.pfs_sys_use_dbregs)); 4203 set_dbregs = 1; 4204 } 4205 } 4206 4207 UNLOCK_PFS(flags); 4208 4209 if (ret) goto error; 4210 } 4211 4212 /* 4213 * SMP system-wide monitoring implies self-monitoring. 4214 * 4215 * The programming model expects the task to 4216 * be pinned on a CPU throughout the session. 4217 * Here we take note of the current CPU at the 4218 * time the context is loaded. No call from 4219 * another CPU will be allowed. 4220 * 4221 * The pinning via shed_setaffinity() 4222 * must be done by the calling task prior 4223 * to this call. 4224 * 4225 * systemwide: keep track of CPU this session is supposed to run on 4226 */ 4227 the_cpu = ctx->ctx_cpu = smp_processor_id(); 4228 4229 ret = -EBUSY; 4230 /* 4231 * now reserve the session 4232 */ 4233 ret = pfm_reserve_session(current, is_system, the_cpu); 4234 if (ret) goto error; 4235 4236 DPRINT(("before cmpxchg() old_ctx=%p new_ctx=%p\n", 4237 thread->pfm_context, ctx)); 4238 4239 ret = -EBUSY; 4240 old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *)); 4241 if (old != NULL) { 4242 DPRINT(("load_pid [%d] already has a context\n", req->load_pid)); 4243 goto error_unres; 4244 } 4245 4246 pfm_reset_msgq(ctx); 4247 4248 ctx->ctx_state = PFM_CTX_LOADED; 4249 4250 /* 4251 * link context to task 4252 */ 4253 ctx->ctx_task = task; 4254 4255 if (is_system) { 4256 /* 4257 * we load as stopped 4258 */ 4259 PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE); 4260 PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); 4261 4262 if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE); 4263 } else { 4264 thread->flags |= IA64_THREAD_PM_VALID; 4265 } 4266 4267 /* 4268 * propagate into thread-state 4269 */ 4270 pfm_copy_pmds(task, ctx); 4271 pfm_copy_pmcs(task, ctx); 4272 4273 pmcs_source = ctx->th_pmcs; 4274 pmds_source = ctx->th_pmds; 4275 4276 /* 4277 * always the case for system-wide 4278 */ 4279 if (task == current) { 4280 4281 if (is_system == 0) { 4282 4283 /* allow user level control */ 4284 ia64_psr(regs)->sp = 0; 4285 DPRINT(("clearing psr.sp for [%d]\n", task_pid_nr(task))); 4286 4287 SET_LAST_CPU(ctx, smp_processor_id()); 4288 INC_ACTIVATION(); 4289 SET_ACTIVATION(ctx); 4290#ifndef CONFIG_SMP 4291 /* 4292 * push the other task out, if any 4293 */ 4294 owner_task = GET_PMU_OWNER(); 4295 if (owner_task) pfm_lazy_save_regs(owner_task); 4296#endif 4297 } 4298 /* 4299 * load all PMD from ctx to PMU (as opposed to thread state) 4300 * restore all PMC from ctx to PMU 4301 */ 4302 pfm_restore_pmds(pmds_source, ctx->ctx_all_pmds[0]); 4303 pfm_restore_pmcs(pmcs_source, ctx->ctx_all_pmcs[0]); 4304 4305 ctx->ctx_reload_pmcs[0] = 0UL; 4306 ctx->ctx_reload_pmds[0] = 0UL; 4307 4308 /* 4309 * guaranteed safe by earlier check against DBG_VALID 4310 */ 4311 if (ctx->ctx_fl_using_dbreg) { 4312 pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); 4313 pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); 4314 } 4315 /* 4316 * set new ownership 4317 */ 4318 SET_PMU_OWNER(task, ctx); 4319 4320 DPRINT(("context loaded on PMU for [%d]\n", task_pid_nr(task))); 4321 } else { 4322 /* 4323 * when not current, task MUST be stopped, so this is safe 4324 */ 4325 regs = task_pt_regs(task); 4326 4327 /* force a full reload */ 4328 ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; 4329 SET_LAST_CPU(ctx, -1); 4330 4331 /* initial saved psr (stopped) */ 4332 ctx->ctx_saved_psr_up = 0UL; 4333 ia64_psr(regs)->up = ia64_psr(regs)->pp = 0; 4334 } 4335 4336 ret = 0; 4337 4338error_unres: 4339 if (ret) pfm_unreserve_session(ctx, ctx->ctx_fl_system, the_cpu); 4340error: 4341 /* 4342 * we must undo the dbregs setting (for system-wide) 4343 */ 4344 if (ret && set_dbregs) { 4345 LOCK_PFS(flags); 4346 pfm_sessions.pfs_sys_use_dbregs--; 4347 UNLOCK_PFS(flags); 4348 } 4349 /* 4350 * release task, there is now a link with the context 4351 */ 4352 if (is_system == 0 && task != current) { 4353 pfm_put_task(task); 4354 4355 if (ret == 0) { 4356 ret = pfm_check_task_exist(ctx); 4357 if (ret) { 4358 ctx->ctx_state = PFM_CTX_UNLOADED; 4359 ctx->ctx_task = NULL; 4360 } 4361 } 4362 } 4363 return ret; 4364} 4365 4366/* 4367 * in this function, we do not need to increase the use count 4368 * for the task via get_task_struct(), because we hold the 4369 * context lock. If the task were to disappear while having 4370 * a context attached, it would go through pfm_exit_thread() 4371 * which also grabs the context lock and would therefore be blocked 4372 * until we are here. 4373 */ 4374static void pfm_flush_pmds(struct task_struct *, pfm_context_t *ctx); 4375 4376static int 4377pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 4378{ 4379 struct task_struct *task = PFM_CTX_TASK(ctx); 4380 struct pt_regs *tregs; 4381 int prev_state, is_system; 4382 int ret; 4383 4384 DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task_pid_nr(task) : -1)); 4385 4386 prev_state = ctx->ctx_state; 4387 is_system = ctx->ctx_fl_system; 4388 4389 /* 4390 * unload only when necessary 4391 */ 4392 if (prev_state == PFM_CTX_UNLOADED) { 4393 DPRINT(("ctx_state=%d, nothing to do\n", prev_state)); 4394 return 0; 4395 } 4396 4397 /* 4398 * clear psr and dcr bits 4399 */ 4400 ret = pfm_stop(ctx, NULL, 0, regs); 4401 if (ret) return ret; 4402 4403 ctx->ctx_state = PFM_CTX_UNLOADED; 4404 4405 /* 4406 * in system mode, we need to update the PMU directly 4407 * and the user level state of the caller, which may not 4408 * necessarily be the creator of the context. 4409 */ 4410 if (is_system) { 4411 4412 /* 4413 * Update cpuinfo 4414 * 4415 * local PMU is taken care of in pfm_stop() 4416 */ 4417 PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE); 4418 PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE); 4419 4420 /* 4421 * save PMDs in context 4422 * release ownership 4423 */ 4424 pfm_flush_pmds(current, ctx); 4425 4426 /* 4427 * at this point we are done with the PMU 4428 * so we can unreserve the resource. 4429 */ 4430 if (prev_state != PFM_CTX_ZOMBIE) 4431 pfm_unreserve_session(ctx, 1 , ctx->ctx_cpu); 4432 4433 /* 4434 * disconnect context from task 4435 */ 4436 task->thread.pfm_context = NULL; 4437 /* 4438 * disconnect task from context 4439 */ 4440 ctx->ctx_task = NULL; 4441 4442 /* 4443 * There is nothing more to cleanup here. 4444 */ 4445 return 0; 4446 } 4447 4448 /* 4449 * per-task mode 4450 */ 4451 tregs = task == current ? regs : task_pt_regs(task); 4452 4453 if (task == current) { 4454 /* 4455 * cancel user level control 4456 */ 4457 ia64_psr(regs)->sp = 1; 4458 4459 DPRINT(("setting psr.sp for [%d]\n", task_pid_nr(task))); 4460 } 4461 /* 4462 * save PMDs to context 4463 * release ownership 4464 */ 4465 pfm_flush_pmds(task, ctx); 4466 4467 /* 4468 * at this point we are done with the PMU 4469 * so we can unreserve the resource. 4470 * 4471 * when state was ZOMBIE, we have already unreserved. 4472 */ 4473 if (prev_state != PFM_CTX_ZOMBIE) 4474 pfm_unreserve_session(ctx, 0 , ctx->ctx_cpu); 4475 4476 /* 4477 * reset activation counter and psr 4478 */ 4479 ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; 4480 SET_LAST_CPU(ctx, -1); 4481 4482 /* 4483 * PMU state will not be restored 4484 */ 4485 task->thread.flags &= ~IA64_THREAD_PM_VALID; 4486 4487 /* 4488 * break links between context and task 4489 */ 4490 task->thread.pfm_context = NULL; 4491 ctx->ctx_task = NULL; 4492 4493 PFM_SET_WORK_PENDING(task, 0); 4494 4495 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE; 4496 ctx->ctx_fl_can_restart = 0; 4497 ctx->ctx_fl_going_zombie = 0; 4498 4499 DPRINT(("disconnected [%d] from context\n", task_pid_nr(task))); 4500 4501 return 0; 4502} 4503 4504 4505/* 4506 * called only from exit_thread(): task == current 4507 * we come here only if current has a context attached (loaded or masked) 4508 */ 4509void 4510pfm_exit_thread(struct task_struct *task) 4511{ 4512 pfm_context_t *ctx; 4513 unsigned long flags; 4514 struct pt_regs *regs = task_pt_regs(task); 4515 int ret, state; 4516 int free_ok = 0; 4517 4518 ctx = PFM_GET_CTX(task); 4519 4520 PROTECT_CTX(ctx, flags); 4521 4522 DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task_pid_nr(task))); 4523 4524 state = ctx->ctx_state; 4525 switch(state) { 4526 case PFM_CTX_UNLOADED: 4527 /* 4528 * only comes to this function if pfm_context is not NULL, i.e., cannot 4529 * be in unloaded state 4530 */ 4531 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task_pid_nr(task)); 4532 break; 4533 case PFM_CTX_LOADED: 4534 case PFM_CTX_MASKED: 4535 ret = pfm_context_unload(ctx, NULL, 0, regs); 4536 if (ret) { 4537 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret); 4538 } 4539 DPRINT(("ctx unloaded for current state was %d\n", state)); 4540 4541 pfm_end_notify_user(ctx); 4542 break; 4543 case PFM_CTX_ZOMBIE: 4544 ret = pfm_context_unload(ctx, NULL, 0, regs); 4545 if (ret) { 4546 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret); 4547 } 4548 free_ok = 1; 4549 break; 4550 default: 4551 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task_pid_nr(task), state); 4552 break; 4553 } 4554 UNPROTECT_CTX(ctx, flags); 4555 4556 { u64 psr = pfm_get_psr(); 4557 BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); 4558 BUG_ON(GET_PMU_OWNER()); 4559 BUG_ON(ia64_psr(regs)->up); 4560 BUG_ON(ia64_psr(regs)->pp); 4561 } 4562 4563 /* 4564 * All memory free operations (especially for vmalloc'ed memory) 4565 * MUST be done with interrupts ENABLED. 4566 */ 4567 if (free_ok) pfm_context_free(ctx); 4568} 4569 4570/* 4571 * functions MUST be listed in the increasing order of their index (see permfon.h) 4572 */ 4573#define PFM_CMD(name, flags, arg_count, arg_type, getsz) { name, #name, flags, arg_count, sizeof(arg_type), getsz } 4574#define PFM_CMD_S(name, flags) { name, #name, flags, 0, 0, NULL } 4575#define PFM_CMD_PCLRWS (PFM_CMD_FD|PFM_CMD_ARG_RW|PFM_CMD_STOP) 4576#define PFM_CMD_PCLRW (PFM_CMD_FD|PFM_CMD_ARG_RW) 4577#define PFM_CMD_NONE { NULL, "no-cmd", 0, 0, 0, NULL} 4578 4579static pfm_cmd_desc_t pfm_cmd_tab[]={ 4580/* 0 */PFM_CMD_NONE, 4581/* 1 */PFM_CMD(pfm_write_pmcs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4582/* 2 */PFM_CMD(pfm_write_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4583/* 3 */PFM_CMD(pfm_read_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4584/* 4 */PFM_CMD_S(pfm_stop, PFM_CMD_PCLRWS), 4585/* 5 */PFM_CMD_S(pfm_start, PFM_CMD_PCLRWS), 4586/* 6 */PFM_CMD_NONE, 4587/* 7 */PFM_CMD_NONE, 4588/* 8 */PFM_CMD(pfm_context_create, PFM_CMD_ARG_RW, 1, pfarg_context_t, pfm_ctx_getsize), 4589/* 9 */PFM_CMD_NONE, 4590/* 10 */PFM_CMD_S(pfm_restart, PFM_CMD_PCLRW), 4591/* 11 */PFM_CMD_NONE, 4592/* 12 */PFM_CMD(pfm_get_features, PFM_CMD_ARG_RW, 1, pfarg_features_t, NULL), 4593/* 13 */PFM_CMD(pfm_debug, 0, 1, unsigned int, NULL), 4594/* 14 */PFM_CMD_NONE, 4595/* 15 */PFM_CMD(pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4596/* 16 */PFM_CMD(pfm_context_load, PFM_CMD_PCLRWS, 1, pfarg_load_t, NULL), 4597/* 17 */PFM_CMD_S(pfm_context_unload, PFM_CMD_PCLRWS), 4598/* 18 */PFM_CMD_NONE, 4599/* 19 */PFM_CMD_NONE, 4600/* 20 */PFM_CMD_NONE, 4601/* 21 */PFM_CMD_NONE, 4602/* 22 */PFM_CMD_NONE, 4603/* 23 */PFM_CMD_NONE, 4604/* 24 */PFM_CMD_NONE, 4605/* 25 */PFM_CMD_NONE, 4606/* 26 */PFM_CMD_NONE, 4607/* 27 */PFM_CMD_NONE, 4608/* 28 */PFM_CMD_NONE, 4609/* 29 */PFM_CMD_NONE, 4610/* 30 */PFM_CMD_NONE, 4611/* 31 */PFM_CMD_NONE, 4612/* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL), 4613/* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL) 4614}; 4615#define PFM_CMD_COUNT (sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t)) 4616 4617static int 4618pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags) 4619{ 4620 struct task_struct *task; 4621 int state, old_state; 4622 4623recheck: 4624 state = ctx->ctx_state; 4625 task = ctx->ctx_task; 4626 4627 if (task == NULL) { 4628 DPRINT(("context %d no task, state=%d\n", ctx->ctx_fd, state)); 4629 return 0; 4630 } 4631 4632 DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n", 4633 ctx->ctx_fd, 4634 state, 4635 task_pid_nr(task), 4636 task->state, PFM_CMD_STOPPED(cmd))); 4637 4638 /* 4639 * self-monitoring always ok. 4640 * 4641 * for system-wide the caller can either be the creator of the 4642 * context (to one to which the context is attached to) OR 4643 * a task running on the same CPU as the session. 4644 */ 4645 if (task == current || ctx->ctx_fl_system) return 0; 4646 4647 /* 4648 * we are monitoring another thread 4649 */ 4650 switch(state) { 4651 case PFM_CTX_UNLOADED: 4652 /* 4653 * if context is UNLOADED we are safe to go 4654 */ 4655 return 0; 4656 case PFM_CTX_ZOMBIE: 4657 /* 4658 * no command can operate on a zombie context 4659 */ 4660 DPRINT(("cmd %d state zombie cannot operate on context\n", cmd)); 4661 return -EINVAL; 4662 case PFM_CTX_MASKED: 4663 /* 4664 * PMU state has been saved to software even though 4665 * the thread may still be running. 4666 */ 4667 if (cmd != PFM_UNLOAD_CONTEXT) return 0; 4668 } 4669 4670 /* 4671 * context is LOADED or MASKED. Some commands may need to have 4672 * the task stopped. 4673 * 4674 * We could lift this restriction for UP but it would mean that 4675 * the user has no guarantee the task would not run between 4676 * two successive calls to perfmonctl(). That's probably OK. 4677 * If this user wants to ensure the task does not run, then 4678 * the task must be stopped. 4679 */ 4680 if (PFM_CMD_STOPPED(cmd)) { 4681 if (!task_is_stopped_or_traced(task)) { 4682 DPRINT(("[%d] task not in stopped state\n", task_pid_nr(task))); 4683 return -EBUSY; 4684 } 4685 /* 4686 * task is now stopped, wait for ctxsw out 4687 * 4688 * This is an interesting point in the code. 4689 * We need to unprotect the context because 4690 * the pfm_save_regs() routines needs to grab 4691 * the same lock. There are danger in doing 4692 * this because it leaves a window open for 4693 * another task to get access to the context 4694 * and possibly change its state. The one thing 4695 * that is not possible is for the context to disappear 4696 * because we are protected by the VFS layer, i.e., 4697 * get_fd()/put_fd(). 4698 */ 4699 old_state = state; 4700 4701 UNPROTECT_CTX(ctx, flags); 4702 4703 wait_task_inactive(task, 0); 4704 4705 PROTECT_CTX(ctx, flags); 4706 4707 /* 4708 * we must recheck to verify if state has changed 4709 */ 4710 if (ctx->ctx_state != old_state) { 4711 DPRINT(("old_state=%d new_state=%d\n", old_state, ctx->ctx_state)); 4712 goto recheck; 4713 } 4714 } 4715 return 0; 4716} 4717 4718/* 4719 * system-call entry point (must return long) 4720 */ 4721asmlinkage long 4722sys_perfmonctl (int fd, int cmd, void __user *arg, int count) 4723{ 4724 struct file *file = NULL; 4725 pfm_context_t *ctx = NULL; 4726 unsigned long flags = 0UL; 4727 void *args_k = NULL; 4728 long ret; /* will expand int return types */ 4729 size_t base_sz, sz, xtra_sz = 0; 4730 int narg, completed_args = 0, call_made = 0, cmd_flags; 4731 int (*func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 4732 int (*getsize)(void *arg, size_t *sz); 4733#define PFM_MAX_ARGSIZE 4096 4734 4735 /* 4736 * reject any call if perfmon was disabled at initialization 4737 */ 4738 if (unlikely(pmu_conf == NULL)) return -ENOSYS; 4739 4740 if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT)) { 4741 DPRINT(("invalid cmd=%d\n", cmd)); 4742 return -EINVAL; 4743 } 4744 4745 func = pfm_cmd_tab[cmd].cmd_func; 4746 narg = pfm_cmd_tab[cmd].cmd_narg; 4747 base_sz = pfm_cmd_tab[cmd].cmd_argsize; 4748 getsize = pfm_cmd_tab[cmd].cmd_getsize; 4749 cmd_flags = pfm_cmd_tab[cmd].cmd_flags; 4750 4751 if (unlikely(func == NULL)) { 4752 DPRINT(("invalid cmd=%d\n", cmd)); 4753 return -EINVAL; 4754 } 4755 4756 DPRINT(("cmd=%s idx=%d narg=0x%x argsz=%lu count=%d\n", 4757 PFM_CMD_NAME(cmd), 4758 cmd, 4759 narg, 4760 base_sz, 4761 count)); 4762 4763 /* 4764 * check if number of arguments matches what the command expects 4765 */ 4766 if (unlikely((narg == PFM_CMD_ARG_MANY && count <= 0) || (narg > 0 && narg != count))) 4767 return -EINVAL; 4768 4769restart_args: 4770 sz = xtra_sz + base_sz*count; 4771 /* 4772 * limit abuse to min page size 4773 */ 4774 if (unlikely(sz > PFM_MAX_ARGSIZE)) { 4775 printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", task_pid_nr(current), sz); 4776 return -E2BIG; 4777 } 4778 4779 /* 4780 * allocate default-sized argument buffer 4781 */ 4782 if (likely(count && args_k == NULL)) { 4783 args_k = kmalloc(PFM_MAX_ARGSIZE, GFP_KERNEL); 4784 if (args_k == NULL) return -ENOMEM; 4785 } 4786 4787 ret = -EFAULT; 4788 4789 /* 4790 * copy arguments 4791 * 4792 * assume sz = 0 for command without parameters 4793 */ 4794 if (sz && copy_from_user(args_k, arg, sz)) { 4795 DPRINT(("cannot copy_from_user %lu bytes @%p\n", sz, arg)); 4796 goto error_args; 4797 } 4798 4799 /* 4800 * check if command supports extra parameters 4801 */ 4802 if (completed_args == 0 && getsize) { 4803 /* 4804 * get extra parameters size (based on main argument) 4805 */ 4806 ret = (*getsize)(args_k, &xtra_sz); 4807 if (ret) goto error_args; 4808 4809 completed_args = 1; 4810 4811 DPRINT(("restart_args sz=%lu xtra_sz=%lu\n", sz, xtra_sz)); 4812 4813 /* retry if necessary */ 4814 if (likely(xtra_sz)) goto restart_args; 4815 } 4816 4817 if (unlikely((cmd_flags & PFM_CMD_FD) == 0)) goto skip_fd; 4818 4819 ret = -EBADF; 4820 4821 file = fget(fd); 4822 if (unlikely(file == NULL)) { 4823 DPRINT(("invalid fd %d\n", fd)); 4824 goto error_args; 4825 } 4826 if (unlikely(PFM_IS_FILE(file) == 0)) { 4827 DPRINT(("fd %d not related to perfmon\n", fd)); 4828 goto error_args; 4829 } 4830 4831 ctx = (pfm_context_t *)file->private_data; 4832 if (unlikely(ctx == NULL)) { 4833 DPRINT(("no context for fd %d\n", fd)); 4834 goto error_args; 4835 } 4836 prefetch(&ctx->ctx_state); 4837 4838 PROTECT_CTX(ctx, flags); 4839 4840 /* 4841 * check task is stopped 4842 */ 4843 ret = pfm_check_task_state(ctx, cmd, flags); 4844 if (unlikely(ret)) goto abort_locked; 4845 4846skip_fd: 4847 ret = (*func)(ctx, args_k, count, task_pt_regs(current)); 4848 4849 call_made = 1; 4850 4851abort_locked: 4852 if (likely(ctx)) { 4853 DPRINT(("context unlocked\n")); 4854 UNPROTECT_CTX(ctx, flags); 4855 } 4856 4857 /* copy argument back to user, if needed */ 4858 if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT; 4859 4860error_args: 4861 if (file) 4862 fput(file); 4863 4864 kfree(args_k); 4865 4866 DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret)); 4867 4868 return ret; 4869} 4870 4871static void 4872pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_regs *regs) 4873{ 4874 pfm_buffer_fmt_t *fmt = ctx->ctx_buf_fmt; 4875 pfm_ovfl_ctrl_t rst_ctrl; 4876 int state; 4877 int ret = 0; 4878 4879 state = ctx->ctx_state; 4880 if (CTX_HAS_SMPL(ctx)) { 4881 4882 rst_ctrl.bits.mask_monitoring = 0; 4883 rst_ctrl.bits.reset_ovfl_pmds = 0; 4884 4885 if (state == PFM_CTX_LOADED) 4886 ret = pfm_buf_fmt_restart_active(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 4887 else 4888 ret = pfm_buf_fmt_restart(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 4889 } else { 4890 rst_ctrl.bits.mask_monitoring = 0; 4891 rst_ctrl.bits.reset_ovfl_pmds = 1; 4892 } 4893 4894 if (ret == 0) { 4895 if (rst_ctrl.bits.reset_ovfl_pmds) { 4896 pfm_reset_regs(ctx, &ovfl_regs, PFM_PMD_LONG_RESET); 4897 } 4898 if (rst_ctrl.bits.mask_monitoring == 0) { 4899 DPRINT(("resuming monitoring\n")); 4900 if (ctx->ctx_state == PFM_CTX_MASKED) pfm_restore_monitoring(current); 4901 } else { 4902 DPRINT(("stopping monitoring\n")); 4903 //pfm_stop_monitoring(current, regs); 4904 } 4905 ctx->ctx_state = PFM_CTX_LOADED; 4906 } 4907} 4908 4909/* 4910 * context MUST BE LOCKED when calling 4911 * can only be called for current 4912 */ 4913static void 4914pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs) 4915{ 4916 int ret; 4917 4918 DPRINT(("entering for [%d]\n", task_pid_nr(current))); 4919 4920 ret = pfm_context_unload(ctx, NULL, 0, regs); 4921 if (ret) { 4922 printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", task_pid_nr(current), ret); 4923 } 4924 4925 /* 4926 * and wakeup controlling task, indicating we are now disconnected 4927 */ 4928 wake_up_interruptible(&ctx->ctx_zombieq); 4929 4930 /* 4931 * given that context is still locked, the controlling 4932 * task will only get access when we return from 4933 * pfm_handle_work(). 4934 */ 4935} 4936 4937static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds); 4938 4939 /* 4940 * pfm_handle_work() can be called with interrupts enabled 4941 * (TIF_NEED_RESCHED) or disabled. The down_interruptible 4942 * call may sleep, therefore we must re-enable interrupts 4943 * to avoid deadlocks. It is safe to do so because this function 4944 * is called ONLY when returning to user level (pUStk=1), in which case 4945 * there is no risk of kernel stack overflow due to deep 4946 * interrupt nesting. 4947 */ 4948void 4949pfm_handle_work(void) 4950{ 4951 pfm_context_t *ctx; 4952 struct pt_regs *regs; 4953 unsigned long flags, dummy_flags; 4954 unsigned long ovfl_regs; 4955 unsigned int reason; 4956 int ret; 4957 4958 ctx = PFM_GET_CTX(current); 4959 if (ctx == NULL) { 4960 printk(KERN_ERR "perfmon: [%d] has no PFM context\n", 4961 task_pid_nr(current)); 4962 return; 4963 } 4964 4965 PROTECT_CTX(ctx, flags); 4966 4967 PFM_SET_WORK_PENDING(current, 0); 4968 4969 regs = task_pt_regs(current); 4970 4971 /* 4972 * extract reason for being here and clear 4973 */ 4974 reason = ctx->ctx_fl_trap_reason; 4975 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE; 4976 ovfl_regs = ctx->ctx_ovfl_regs[0]; 4977 4978 DPRINT(("reason=%d state=%d\n", reason, ctx->ctx_state)); 4979 4980 /* 4981 * must be done before we check for simple-reset mode 4982 */ 4983 if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE) 4984 goto do_zombie; 4985 4986 //if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking; 4987 if (reason == PFM_TRAP_REASON_RESET) 4988 goto skip_blocking; 4989 4990 /* 4991 * restore interrupt mask to what it was on entry. 4992 * Could be enabled/diasbled. 4993 */ 4994 UNPROTECT_CTX(ctx, flags); 4995 4996 /* 4997 * force interrupt enable because of down_interruptible() 4998 */ 4999 local_irq_enable(); 5000 5001 DPRINT(("before block sleeping\n")); 5002 5003 /* 5004 * may go through without blocking on SMP systems 5005 * if restart has been received already by the time we call down() 5006 */ 5007 ret = wait_for_completion_interruptible(&ctx->ctx_restart_done); 5008 5009 DPRINT(("after block sleeping ret=%d\n", ret)); 5010 5011 /* 5012 * lock context and mask interrupts again 5013 * We save flags into a dummy because we may have 5014 * altered interrupts mask compared to entry in this 5015 * function. 5016 */ 5017 PROTECT_CTX(ctx, dummy_flags); 5018 5019 /* 5020 * we need to read the ovfl_regs only after wake-up 5021 * because we may have had pfm_write_pmds() in between 5022 * and that can changed PMD values and therefore 5023 * ovfl_regs is reset for these new PMD values. 5024 */ 5025 ovfl_regs = ctx->ctx_ovfl_regs[0]; 5026 5027 if (ctx->ctx_fl_going_zombie) { 5028do_zombie: 5029 DPRINT(("context is zombie, bailing out\n")); 5030 pfm_context_force_terminate(ctx, regs); 5031 goto nothing_to_do; 5032 } 5033 /* 5034 * in case of interruption of down() we don't restart anything 5035 */ 5036 if (ret < 0) 5037 goto nothing_to_do; 5038 5039skip_blocking: 5040 pfm_resume_after_ovfl(ctx, ovfl_regs, regs); 5041 ctx->ctx_ovfl_regs[0] = 0UL; 5042 5043nothing_to_do: 5044 /* 5045 * restore flags as they were upon entry 5046 */ 5047 UNPROTECT_CTX(ctx, flags); 5048} 5049 5050static int 5051pfm_notify_user(pfm_context_t *ctx, pfm_msg_t *msg) 5052{ 5053 if (ctx->ctx_state == PFM_CTX_ZOMBIE) { 5054 DPRINT(("ignoring overflow notification, owner is zombie\n")); 5055 return 0; 5056 } 5057 5058 DPRINT(("waking up somebody\n")); 5059 5060 if (msg) wake_up_interruptible(&ctx->ctx_msgq_wait); 5061 5062 /* 5063 * safe, we are not in intr handler, nor in ctxsw when 5064 * we come here 5065 */ 5066 kill_fasync (&ctx->ctx_async_queue, SIGIO, POLL_IN); 5067 5068 return 0; 5069} 5070 5071static int 5072pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds) 5073{ 5074 pfm_msg_t *msg = NULL; 5075 5076 if (ctx->ctx_fl_no_msg == 0) { 5077 msg = pfm_get_new_msg(ctx); 5078 if (msg == NULL) { 5079 printk(KERN_ERR "perfmon: pfm_ovfl_notify_user no more notification msgs\n"); 5080 return -1; 5081 } 5082 5083 msg->pfm_ovfl_msg.msg_type = PFM_MSG_OVFL; 5084 msg->pfm_ovfl_msg.msg_ctx_fd = ctx->ctx_fd; 5085 msg->pfm_ovfl_msg.msg_active_set = 0; 5086 msg->pfm_ovfl_msg.msg_ovfl_pmds[0] = ovfl_pmds; 5087 msg->pfm_ovfl_msg.msg_ovfl_pmds[1] = 0UL; 5088 msg->pfm_ovfl_msg.msg_ovfl_pmds[2] = 0UL; 5089 msg->pfm_ovfl_msg.msg_ovfl_pmds[3] = 0UL; 5090 msg->pfm_ovfl_msg.msg_tstamp = 0UL; 5091 } 5092 5093 DPRINT(("ovfl msg: msg=%p no_msg=%d fd=%d ovfl_pmds=0x%lx\n", 5094 msg, 5095 ctx->ctx_fl_no_msg, 5096 ctx->ctx_fd, 5097 ovfl_pmds)); 5098 5099 return pfm_notify_user(ctx, msg); 5100} 5101 5102static int 5103pfm_end_notify_user(pfm_context_t *ctx) 5104{ 5105 pfm_msg_t *msg; 5106 5107 msg = pfm_get_new_msg(ctx); 5108 if (msg == NULL) { 5109 printk(KERN_ERR "perfmon: pfm_end_notify_user no more notification msgs\n"); 5110 return -1; 5111 } 5112 /* no leak */ 5113 memset(msg, 0, sizeof(*msg)); 5114 5115 msg->pfm_end_msg.msg_type = PFM_MSG_END; 5116 msg->pfm_end_msg.msg_ctx_fd = ctx->ctx_fd; 5117 msg->pfm_ovfl_msg.msg_tstamp = 0UL; 5118 5119 DPRINT(("end msg: msg=%p no_msg=%d ctx_fd=%d\n", 5120 msg, 5121 ctx->ctx_fl_no_msg, 5122 ctx->ctx_fd)); 5123 5124 return pfm_notify_user(ctx, msg); 5125} 5126 5127/* 5128 * main overflow processing routine. 5129 * it can be called from the interrupt path or explicitly during the context switch code 5130 */ 5131static void pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, 5132 unsigned long pmc0, struct pt_regs *regs) 5133{ 5134 pfm_ovfl_arg_t *ovfl_arg; 5135 unsigned long mask; 5136 unsigned long old_val, ovfl_val, new_val; 5137 unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL, reset_pmds; 5138 unsigned long tstamp; 5139 pfm_ovfl_ctrl_t ovfl_ctrl; 5140 unsigned int i, has_smpl; 5141 int must_notify = 0; 5142 5143 if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) goto stop_monitoring; 5144 5145 /* 5146 * sanity test. Should never happen 5147 */ 5148 if (unlikely((pmc0 & 0x1) == 0)) goto sanity_check; 5149 5150 tstamp = ia64_get_itc(); 5151 mask = pmc0 >> PMU_FIRST_COUNTER; 5152 ovfl_val = pmu_conf->ovfl_val; 5153 has_smpl = CTX_HAS_SMPL(ctx); 5154 5155 DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s " 5156 "used_pmds=0x%lx\n", 5157 pmc0, 5158 task ? task_pid_nr(task): -1, 5159 (regs ? regs->cr_iip : 0), 5160 CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking", 5161 ctx->ctx_used_pmds[0])); 5162 5163 5164 /* 5165 * first we update the virtual counters 5166 * assume there was a prior ia64_srlz_d() issued 5167 */ 5168 for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) { 5169 5170 /* skip pmd which did not overflow */ 5171 if ((mask & 0x1) == 0) continue; 5172 5173 /* 5174 * Note that the pmd is not necessarily 0 at this point as qualified events 5175 * may have happened before the PMU was frozen. The residual count is not 5176 * taken into consideration here but will be with any read of the pmd via 5177 * pfm_read_pmds(). 5178 */ 5179 old_val = new_val = ctx->ctx_pmds[i].val; 5180 new_val += 1 + ovfl_val; 5181 ctx->ctx_pmds[i].val = new_val; 5182 5183 /* 5184 * check for overflow condition 5185 */ 5186 if (likely(old_val > new_val)) { 5187 ovfl_pmds |= 1UL << i; 5188 if (PMC_OVFL_NOTIFY(ctx, i)) ovfl_notify |= 1UL << i; 5189 } 5190 5191 DPRINT_ovfl(("ctx_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n", 5192 i, 5193 new_val, 5194 old_val, 5195 ia64_get_pmd(i) & ovfl_val, 5196 ovfl_pmds, 5197 ovfl_notify)); 5198 } 5199 5200 /* 5201 * there was no 64-bit overflow, nothing else to do 5202 */ 5203 if (ovfl_pmds == 0UL) return; 5204 5205 /* 5206 * reset all control bits 5207 */ 5208 ovfl_ctrl.val = 0; 5209 reset_pmds = 0UL; 5210 5211 /* 5212 * if a sampling format module exists, then we "cache" the overflow by 5213 * calling the module's handler() routine. 5214 */ 5215 if (has_smpl) { 5216 unsigned long start_cycles, end_cycles; 5217 unsigned long pmd_mask; 5218 int j, k, ret = 0; 5219 int this_cpu = smp_processor_id(); 5220 5221 pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER; 5222 ovfl_arg = &ctx->ctx_ovfl_arg; 5223 5224 prefetch(ctx->ctx_smpl_hdr); 5225 5226 for(i=PMU_FIRST_COUNTER; pmd_mask && ret == 0; i++, pmd_mask >>=1) { 5227 5228 mask = 1UL << i; 5229 5230 if ((pmd_mask & 0x1) == 0) continue; 5231 5232 ovfl_arg->ovfl_pmd = (unsigned char )i; 5233 ovfl_arg->ovfl_notify = ovfl_notify & mask ? 1 : 0; 5234 ovfl_arg->active_set = 0; 5235 ovfl_arg->ovfl_ctrl.val = 0; /* module must fill in all fields */ 5236 ovfl_arg->smpl_pmds[0] = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0]; 5237 5238 ovfl_arg->pmd_value = ctx->ctx_pmds[i].val; 5239 ovfl_arg->pmd_last_reset = ctx->ctx_pmds[i].lval; 5240 ovfl_arg->pmd_eventid = ctx->ctx_pmds[i].eventid; 5241 5242 /* 5243 * copy values of pmds of interest. Sampling format may copy them 5244 * into sampling buffer. 5245 */ 5246 if (smpl_pmds) { 5247 for(j=0, k=0; smpl_pmds; j++, smpl_pmds >>=1) { 5248 if ((smpl_pmds & 0x1) == 0) continue; 5249 ovfl_arg->smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ? pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j); 5250 DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg->smpl_pmds_values[k-1])); 5251 } 5252 } 5253 5254 pfm_stats[this_cpu].pfm_smpl_handler_calls++; 5255 5256 start_cycles = ia64_get_itc(); 5257 5258 /* 5259 * call custom buffer format record (handler) routine 5260 */ 5261 ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, ovfl_arg, regs, tstamp); 5262 5263 end_cycles = ia64_get_itc(); 5264 5265 /* 5266 * For those controls, we take the union because they have 5267 * an all or nothing behavior. 5268 */ 5269 ovfl_ctrl.bits.notify_user |= ovfl_arg->ovfl_ctrl.bits.notify_user; 5270 ovfl_ctrl.bits.block_task |= ovfl_arg->ovfl_ctrl.bits.block_task; 5271 ovfl_ctrl.bits.mask_monitoring |= ovfl_arg->ovfl_ctrl.bits.mask_monitoring; 5272 /* 5273 * build the bitmask of pmds to reset now 5274 */ 5275 if (ovfl_arg->ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask; 5276 5277 pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles; 5278 } 5279 /* 5280 * when the module cannot handle the rest of the overflows, we abort right here 5281 */ 5282 if (ret && pmd_mask) { 5283 DPRINT(("handler aborts leftover ovfl_pmds=0x%lx\n", 5284 pmd_mask<<PMU_FIRST_COUNTER)); 5285 } 5286 /* 5287 * remove the pmds we reset now from the set of pmds to reset in pfm_restart() 5288 */ 5289 ovfl_pmds &= ~reset_pmds; 5290 } else { 5291 /* 5292 * when no sampling module is used, then the default 5293 * is to notify on overflow if requested by user 5294 */ 5295 ovfl_ctrl.bits.notify_user = ovfl_notify ? 1 : 0; 5296 ovfl_ctrl.bits.block_task = ovfl_notify ? 1 : 0; 5297 ovfl_ctrl.bits.mask_monitoring = ovfl_notify ? 1 : 0; 5298 ovfl_ctrl.bits.reset_ovfl_pmds = ovfl_notify ? 0 : 1; 5299 /* 5300 * if needed, we reset all overflowed pmds 5301 */ 5302 if (ovfl_notify == 0) reset_pmds = ovfl_pmds; 5303 } 5304 5305 DPRINT_ovfl(("ovfl_pmds=0x%lx reset_pmds=0x%lx\n", ovfl_pmds, reset_pmds)); 5306 5307 /* 5308 * reset the requested PMD registers using the short reset values 5309 */ 5310 if (reset_pmds) { 5311 unsigned long bm = reset_pmds; 5312 pfm_reset_regs(ctx, &bm, PFM_PMD_SHORT_RESET); 5313 } 5314 5315 if (ovfl_notify && ovfl_ctrl.bits.notify_user) { 5316 /* 5317 * keep track of what to reset when unblocking 5318 */ 5319 ctx->ctx_ovfl_regs[0] = ovfl_pmds; 5320 5321 /* 5322 * check for blocking context 5323 */ 5324 if (CTX_OVFL_NOBLOCK(ctx) == 0 && ovfl_ctrl.bits.block_task) { 5325 5326 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_BLOCK; 5327 5328 /* 5329 * set the perfmon specific checking pending work for the task 5330 */ 5331 PFM_SET_WORK_PENDING(task, 1); 5332 5333 /* 5334 * when coming from ctxsw, current still points to the 5335 * previous task, therefore we must work with task and not current. 5336 */ 5337 set_notify_resume(task); 5338 } 5339 /* 5340 * defer until state is changed (shorten spin window). the context is locked 5341 * anyway, so the signal receiver would come spin for nothing. 5342 */ 5343 must_notify = 1; 5344 } 5345 5346 DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n", 5347 GET_PMU_OWNER() ? task_pid_nr(GET_PMU_OWNER()) : -1, 5348 PFM_GET_WORK_PENDING(task), 5349 ctx->ctx_fl_trap_reason, 5350 ovfl_pmds, 5351 ovfl_notify, 5352 ovfl_ctrl.bits.mask_monitoring ? 1 : 0)); 5353 /* 5354 * in case monitoring must be stopped, we toggle the psr bits 5355 */ 5356 if (ovfl_ctrl.bits.mask_monitoring) { 5357 pfm_mask_monitoring(task); 5358 ctx->ctx_state = PFM_CTX_MASKED; 5359 ctx->ctx_fl_can_restart = 1; 5360 } 5361 5362 /* 5363 * send notification now 5364 */ 5365 if (must_notify) pfm_ovfl_notify_user(ctx, ovfl_notify); 5366 5367 return; 5368 5369sanity_check: 5370 printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n", 5371 smp_processor_id(), 5372 task ? task_pid_nr(task) : -1, 5373 pmc0); 5374 return; 5375 5376stop_monitoring: 5377 /* 5378 * in SMP, zombie context is never restored but reclaimed in pfm_load_regs(). 5379 * Moreover, zombies are also reclaimed in pfm_save_regs(). Therefore we can 5380 * come here as zombie only if the task is the current task. In which case, we 5381 * can access the PMU hardware directly. 5382 * 5383 * Note that zombies do have PM_VALID set. So here we do the minimal. 5384 * 5385 * In case the context was zombified it could not be reclaimed at the time 5386 * the monitoring program exited. At this point, the PMU reservation has been 5387 * returned, the sampiing buffer has been freed. We must convert this call 5388 * into a spurious interrupt. However, we must also avoid infinite overflows 5389 * by stopping monitoring for this task. We can only come here for a per-task 5390 * context. All we need to do is to stop monitoring using the psr bits which 5391 * are always task private. By re-enabling secure montioring, we ensure that 5392 * the monitored task will not be able to re-activate monitoring. 5393 * The task will eventually be context switched out, at which point the context 5394 * will be reclaimed (that includes releasing ownership of the PMU). 5395 * 5396 * So there might be a window of time where the number of per-task session is zero 5397 * yet one PMU might have a owner and get at most one overflow interrupt for a zombie 5398 * context. This is safe because if a per-task session comes in, it will push this one 5399 * out and by the virtue on pfm_save_regs(), this one will disappear. If a system wide 5400 * session is force on that CPU, given that we use task pinning, pfm_save_regs() will 5401 * also push our zombie context out. 5402 * 5403 * Overall pretty hairy stuff.... 5404 */ 5405 DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task_pid_nr(task): -1)); 5406 pfm_clear_psr_up(); 5407 ia64_psr(regs)->up = 0; 5408 ia64_psr(regs)->sp = 1; 5409 return; 5410} 5411 5412static int 5413pfm_do_interrupt_handler(void *arg, struct pt_regs *regs) 5414{ 5415 struct task_struct *task; 5416 pfm_context_t *ctx; 5417 unsigned long flags; 5418 u64 pmc0; 5419 int this_cpu = smp_processor_id(); 5420 int retval = 0; 5421 5422 pfm_stats[this_cpu].pfm_ovfl_intr_count++; 5423 5424 /* 5425 * srlz.d done before arriving here 5426 */ 5427 pmc0 = ia64_get_pmc(0); 5428 5429 task = GET_PMU_OWNER(); 5430 ctx = GET_PMU_CTX(); 5431 5432 /* 5433 * if we have some pending bits set 5434 * assumes : if any PMC0.bit[63-1] is set, then PMC0.fr = 1 5435 */ 5436 if (PMC0_HAS_OVFL(pmc0) && task) { 5437 /* 5438 * we assume that pmc0.fr is always set here 5439 */ 5440 5441 /* sanity check */ 5442 if (!ctx) goto report_spurious1; 5443 5444 if (ctx->ctx_fl_system == 0 && (task->thread.flags & IA64_THREAD_PM_VALID) == 0) 5445 goto report_spurious2; 5446 5447 PROTECT_CTX_NOPRINT(ctx, flags); 5448 5449 pfm_overflow_handler(task, ctx, pmc0, regs); 5450 5451 UNPROTECT_CTX_NOPRINT(ctx, flags); 5452 5453 } else { 5454 pfm_stats[this_cpu].pfm_spurious_ovfl_intr_count++; 5455 retval = -1; 5456 } 5457 /* 5458 * keep it unfrozen at all times 5459 */ 5460 pfm_unfreeze_pmu(); 5461 5462 return retval; 5463 5464report_spurious1: 5465 printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n", 5466 this_cpu, task_pid_nr(task)); 5467 pfm_unfreeze_pmu(); 5468 return -1; 5469report_spurious2: 5470 printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n", 5471 this_cpu, 5472 task_pid_nr(task)); 5473 pfm_unfreeze_pmu(); 5474 return -1; 5475} 5476 5477static irqreturn_t 5478pfm_interrupt_handler(int irq, void *arg) 5479{ 5480 unsigned long start_cycles, total_cycles; 5481 unsigned long min, max; 5482 int this_cpu; 5483 int ret; 5484 struct pt_regs *regs = get_irq_regs(); 5485 5486 this_cpu = get_cpu(); 5487 if (likely(!pfm_alt_intr_handler)) { 5488 min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min; 5489 max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max; 5490 5491 start_cycles = ia64_get_itc(); 5492 5493 ret = pfm_do_interrupt_handler(arg, regs); 5494 5495 total_cycles = ia64_get_itc(); 5496 5497 /* 5498 * don't measure spurious interrupts 5499 */ 5500 if (likely(ret == 0)) { 5501 total_cycles -= start_cycles; 5502 5503 if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles; 5504 if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles; 5505 5506 pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles; 5507 } 5508 } 5509 else { 5510 (*pfm_alt_intr_handler->handler)(irq, arg, regs); 5511 } 5512 5513 put_cpu(); 5514 return IRQ_HANDLED; 5515} 5516 5517/* 5518 * /proc/perfmon interface, for debug only 5519 */ 5520 5521#define PFM_PROC_SHOW_HEADER ((void *)(long)nr_cpu_ids+1) 5522 5523static void * 5524pfm_proc_start(struct seq_file *m, loff_t *pos) 5525{ 5526 if (*pos == 0) { 5527 return PFM_PROC_SHOW_HEADER; 5528 } 5529 5530 while (*pos <= nr_cpu_ids) { 5531 if (cpu_online(*pos - 1)) { 5532 return (void *)*pos; 5533 } 5534 ++*pos; 5535 } 5536 return NULL; 5537} 5538 5539static void * 5540pfm_proc_next(struct seq_file *m, void *v, loff_t *pos) 5541{ 5542 ++*pos; 5543 return pfm_proc_start(m, pos); 5544} 5545 5546static void 5547pfm_proc_stop(struct seq_file *m, void *v) 5548{ 5549} 5550 5551static void 5552pfm_proc_show_header(struct seq_file *m) 5553{ 5554 struct list_head * pos; 5555 pfm_buffer_fmt_t * entry; 5556 unsigned long flags; 5557 5558 seq_printf(m, 5559 "perfmon version : %u.%u\n" 5560 "model : %s\n" 5561 "fastctxsw : %s\n" 5562 "expert mode : %s\n" 5563 "ovfl_mask : 0x%lx\n" 5564 "PMU flags : 0x%x\n", 5565 PFM_VERSION_MAJ, PFM_VERSION_MIN, 5566 pmu_conf->pmu_name, 5567 pfm_sysctl.fastctxsw > 0 ? "Yes": "No", 5568 pfm_sysctl.expert_mode > 0 ? "Yes": "No", 5569 pmu_conf->ovfl_val, 5570 pmu_conf->flags); 5571 5572 LOCK_PFS(flags); 5573 5574 seq_printf(m, 5575 "proc_sessions : %u\n" 5576 "sys_sessions : %u\n" 5577 "sys_use_dbregs : %u\n" 5578 "ptrace_use_dbregs : %u\n", 5579 pfm_sessions.pfs_task_sessions, 5580 pfm_sessions.pfs_sys_sessions, 5581 pfm_sessions.pfs_sys_use_dbregs, 5582 pfm_sessions.pfs_ptrace_use_dbregs); 5583 5584 UNLOCK_PFS(flags); 5585 5586 spin_lock(&pfm_buffer_fmt_lock); 5587 5588 list_for_each(pos, &pfm_buffer_fmt_list) { 5589 entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); 5590 seq_printf(m, "format : %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x %s\n", 5591 entry->fmt_uuid[0], 5592 entry->fmt_uuid[1], 5593 entry->fmt_uuid[2], 5594 entry->fmt_uuid[3], 5595 entry->fmt_uuid[4], 5596 entry->fmt_uuid[5], 5597 entry->fmt_uuid[6], 5598 entry->fmt_uuid[7], 5599 entry->fmt_uuid[8], 5600 entry->fmt_uuid[9], 5601 entry->fmt_uuid[10], 5602 entry->fmt_uuid[11], 5603 entry->fmt_uuid[12], 5604 entry->fmt_uuid[13], 5605 entry->fmt_uuid[14], 5606 entry->fmt_uuid[15], 5607 entry->fmt_name); 5608 } 5609 spin_unlock(&pfm_buffer_fmt_lock); 5610 5611} 5612 5613static int 5614pfm_proc_show(struct seq_file *m, void *v) 5615{ 5616 unsigned long psr; 5617 unsigned int i; 5618 int cpu; 5619 5620 if (v == PFM_PROC_SHOW_HEADER) { 5621 pfm_proc_show_header(m); 5622 return 0; 5623 } 5624 5625 /* show info for CPU (v - 1) */ 5626 5627 cpu = (long)v - 1; 5628 seq_printf(m, 5629 "CPU%-2d overflow intrs : %lu\n" 5630 "CPU%-2d overflow cycles : %lu\n" 5631 "CPU%-2d overflow min : %lu\n" 5632 "CPU%-2d overflow max : %lu\n" 5633 "CPU%-2d smpl handler calls : %lu\n" 5634 "CPU%-2d smpl handler cycles : %lu\n" 5635 "CPU%-2d spurious intrs : %lu\n" 5636 "CPU%-2d replay intrs : %lu\n" 5637 "CPU%-2d syst_wide : %d\n" 5638 "CPU%-2d dcr_pp : %d\n" 5639 "CPU%-2d exclude idle : %d\n" 5640 "CPU%-2d owner : %d\n" 5641 "CPU%-2d context : %p\n" 5642 "CPU%-2d activations : %lu\n", 5643 cpu, pfm_stats[cpu].pfm_ovfl_intr_count, 5644 cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles, 5645 cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_min, 5646 cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_max, 5647 cpu, pfm_stats[cpu].pfm_smpl_handler_calls, 5648 cpu, pfm_stats[cpu].pfm_smpl_handler_cycles, 5649 cpu, pfm_stats[cpu].pfm_spurious_ovfl_intr_count, 5650 cpu, pfm_stats[cpu].pfm_replay_ovfl_intr_count, 5651 cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_SYST_WIDE ? 1 : 0, 5652 cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_DCR_PP ? 1 : 0, 5653 cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_EXCL_IDLE ? 1 : 0, 5654 cpu, pfm_get_cpu_data(pmu_owner, cpu) ? pfm_get_cpu_data(pmu_owner, cpu)->pid: -1, 5655 cpu, pfm_get_cpu_data(pmu_ctx, cpu), 5656 cpu, pfm_get_cpu_data(pmu_activation_number, cpu)); 5657 5658 if (num_online_cpus() == 1 && pfm_sysctl.debug > 0) { 5659 5660 psr = pfm_get_psr(); 5661 5662 ia64_srlz_d(); 5663 5664 seq_printf(m, 5665 "CPU%-2d psr : 0x%lx\n" 5666 "CPU%-2d pmc0 : 0x%lx\n", 5667 cpu, psr, 5668 cpu, ia64_get_pmc(0)); 5669 5670 for (i=0; PMC_IS_LAST(i) == 0; i++) { 5671 if (PMC_IS_COUNTING(i) == 0) continue; 5672 seq_printf(m, 5673 "CPU%-2d pmc%u : 0x%lx\n" 5674 "CPU%-2d pmd%u : 0x%lx\n", 5675 cpu, i, ia64_get_pmc(i), 5676 cpu, i, ia64_get_pmd(i)); 5677 } 5678 } 5679 return 0; 5680} 5681 5682const struct seq_operations pfm_seq_ops = { 5683 .start = pfm_proc_start, 5684 .next = pfm_proc_next, 5685 .stop = pfm_proc_stop, 5686 .show = pfm_proc_show 5687}; 5688 5689static int 5690pfm_proc_open(struct inode *inode, struct file *file) 5691{ 5692 return seq_open(file, &pfm_seq_ops); 5693} 5694 5695 5696/* 5697 * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens 5698 * during pfm_enable() hence before pfm_start(). We cannot assume monitoring 5699 * is active or inactive based on mode. We must rely on the value in 5700 * local_cpu_data->pfm_syst_info 5701 */ 5702void 5703pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin) 5704{ 5705 struct pt_regs *regs; 5706 unsigned long dcr; 5707 unsigned long dcr_pp; 5708 5709 dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0; 5710 5711 /* 5712 * pid 0 is guaranteed to be the idle task. There is one such task with pid 0 5713 * on every CPU, so we can rely on the pid to identify the idle task. 5714 */ 5715 if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) { 5716 regs = task_pt_regs(task); 5717 ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0; 5718 return; 5719 } 5720 /* 5721 * if monitoring has started 5722 */ 5723 if (dcr_pp) { 5724 dcr = ia64_getreg(_IA64_REG_CR_DCR); 5725 /* 5726 * context switching in? 5727 */ 5728 if (is_ctxswin) { 5729 /* mask monitoring for the idle task */ 5730 ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP); 5731 pfm_clear_psr_pp(); 5732 ia64_srlz_i(); 5733 return; 5734 } 5735 /* 5736 * context switching out 5737 * restore monitoring for next task 5738 * 5739 * Due to inlining this odd if-then-else construction generates 5740 * better code. 5741 */ 5742 ia64_setreg(_IA64_REG_CR_DCR, dcr |IA64_DCR_PP); 5743 pfm_set_psr_pp(); 5744 ia64_srlz_i(); 5745 } 5746} 5747 5748#ifdef CONFIG_SMP 5749 5750static void 5751pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs) 5752{ 5753 struct task_struct *task = ctx->ctx_task; 5754 5755 ia64_psr(regs)->up = 0; 5756 ia64_psr(regs)->sp = 1; 5757 5758 if (GET_PMU_OWNER() == task) { 5759 DPRINT(("cleared ownership for [%d]\n", 5760 task_pid_nr(ctx->ctx_task))); 5761 SET_PMU_OWNER(NULL, NULL); 5762 } 5763 5764 /* 5765 * disconnect the task from the context and vice-versa 5766 */ 5767 PFM_SET_WORK_PENDING(task, 0); 5768 5769 task->thread.pfm_context = NULL; 5770 task->thread.flags &= ~IA64_THREAD_PM_VALID; 5771 5772 DPRINT(("force cleanup for [%d]\n", task_pid_nr(task))); 5773} 5774 5775 5776/* 5777 * in 2.6, interrupts are masked when we come here and the runqueue lock is held 5778 */ 5779void 5780pfm_save_regs(struct task_struct *task) 5781{ 5782 pfm_context_t *ctx; 5783 unsigned long flags; 5784 u64 psr; 5785 5786 5787 ctx = PFM_GET_CTX(task); 5788 if (ctx == NULL) return; 5789 5790 /* 5791 * we always come here with interrupts ALREADY disabled by 5792 * the scheduler. So we simply need to protect against concurrent 5793 * access, not CPU concurrency. 5794 */ 5795 flags = pfm_protect_ctx_ctxsw(ctx); 5796 5797 if (ctx->ctx_state == PFM_CTX_ZOMBIE) { 5798 struct pt_regs *regs = task_pt_regs(task); 5799 5800 pfm_clear_psr_up(); 5801 5802 pfm_force_cleanup(ctx, regs); 5803 5804 BUG_ON(ctx->ctx_smpl_hdr); 5805 5806 pfm_unprotect_ctx_ctxsw(ctx, flags); 5807 5808 pfm_context_free(ctx); 5809 return; 5810 } 5811 5812 /* 5813 * save current PSR: needed because we modify it 5814 */ 5815 ia64_srlz_d(); 5816 psr = pfm_get_psr(); 5817 5818 BUG_ON(psr & (IA64_PSR_I)); 5819 5820 /* 5821 * stop monitoring: 5822 * This is the last instruction which may generate an overflow 5823 * 5824 * We do not need to set psr.sp because, it is irrelevant in kernel. 5825 * It will be restored from ipsr when going back to user level 5826 */ 5827 pfm_clear_psr_up(); 5828 5829 /* 5830 * keep a copy of psr.up (for reload) 5831 */ 5832 ctx->ctx_saved_psr_up = psr & IA64_PSR_UP; 5833 5834 /* 5835 * release ownership of this PMU. 5836 * PM interrupts are masked, so nothing 5837 * can happen. 5838 */ 5839 SET_PMU_OWNER(NULL, NULL); 5840 5841 /* 5842 * we systematically save the PMD as we have no 5843 * guarantee we will be schedule at that same 5844 * CPU again. 5845 */ 5846 pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); 5847 5848 /* 5849 * save pmc0 ia64_srlz_d() done in pfm_save_pmds() 5850 * we will need it on the restore path to check 5851 * for pending overflow. 5852 */ 5853 ctx->th_pmcs[0] = ia64_get_pmc(0); 5854 5855 /* 5856 * unfreeze PMU if had pending overflows 5857 */ 5858 if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); 5859 5860 /* 5861 * finally, allow context access. 5862 * interrupts will still be masked after this call. 5863 */ 5864 pfm_unprotect_ctx_ctxsw(ctx, flags); 5865} 5866 5867#else /* !CONFIG_SMP */ 5868void 5869pfm_save_regs(struct task_struct *task) 5870{ 5871 pfm_context_t *ctx; 5872 u64 psr; 5873 5874 ctx = PFM_GET_CTX(task); 5875 if (ctx == NULL) return; 5876 5877 /* 5878 * save current PSR: needed because we modify it 5879 */ 5880 psr = pfm_get_psr(); 5881 5882 BUG_ON(psr & (IA64_PSR_I)); 5883 5884 /* 5885 * stop monitoring: 5886 * This is the last instruction which may generate an overflow 5887 * 5888 * We do not need to set psr.sp because, it is irrelevant in kernel. 5889 * It will be restored from ipsr when going back to user level 5890 */ 5891 pfm_clear_psr_up(); 5892 5893 /* 5894 * keep a copy of psr.up (for reload) 5895 */ 5896 ctx->ctx_saved_psr_up = psr & IA64_PSR_UP; 5897} 5898 5899static void 5900pfm_lazy_save_regs (struct task_struct *task) 5901{ 5902 pfm_context_t *ctx; 5903 unsigned long flags; 5904 5905 { u64 psr = pfm_get_psr(); 5906 BUG_ON(psr & IA64_PSR_UP); 5907 } 5908 5909 ctx = PFM_GET_CTX(task); 5910 5911 PROTECT_CTX(ctx,flags); 5912 5913 /* 5914 * release ownership of this PMU. 5915 * must be done before we save the registers. 5916 * 5917 * after this call any PMU interrupt is treated 5918 * as spurious. 5919 */ 5920 SET_PMU_OWNER(NULL, NULL); 5921 5922 /* 5923 * save all the pmds we use 5924 */ 5925 pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); 5926 5927 /* 5928 * save pmc0 ia64_srlz_d() done in pfm_save_pmds() 5929 * it is needed to check for pended overflow 5930 * on the restore path 5931 */ 5932 ctx->th_pmcs[0] = ia64_get_pmc(0); 5933 5934 /* 5935 * unfreeze PMU if had pending overflows 5936 */ 5937 if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); 5938 5939 /* 5940 * now get can unmask PMU interrupts, they will 5941 * be treated as purely spurious and we will not 5942 * lose any information 5943 */ 5944 UNPROTECT_CTX(ctx,flags); 5945} 5946#endif /* CONFIG_SMP */ 5947 5948#ifdef CONFIG_SMP 5949/* 5950 * in 2.6, interrupts are masked when we come here and the runqueue lock is held 5951 */ 5952void 5953pfm_load_regs (struct task_struct *task) 5954{ 5955 pfm_context_t *ctx; 5956 unsigned long pmc_mask = 0UL, pmd_mask = 0UL; 5957 unsigned long flags; 5958 u64 psr, psr_up; 5959 int need_irq_resend; 5960 5961 ctx = PFM_GET_CTX(task); 5962 if (unlikely(ctx == NULL)) return; 5963 5964 BUG_ON(GET_PMU_OWNER()); 5965 5966 /* 5967 * possible on unload 5968 */ 5969 if (unlikely((task->thread.flags & IA64_THREAD_PM_VALID) == 0)) return; 5970 5971 /* 5972 * we always come here with interrupts ALREADY disabled by 5973 * the scheduler. So we simply need to protect against concurrent 5974 * access, not CPU concurrency. 5975 */ 5976 flags = pfm_protect_ctx_ctxsw(ctx); 5977 psr = pfm_get_psr(); 5978 5979 need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; 5980 5981 BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); 5982 BUG_ON(psr & IA64_PSR_I); 5983 5984 if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) { 5985 struct pt_regs *regs = task_pt_regs(task); 5986 5987 BUG_ON(ctx->ctx_smpl_hdr); 5988 5989 pfm_force_cleanup(ctx, regs); 5990 5991 pfm_unprotect_ctx_ctxsw(ctx, flags); 5992 5993 /* 5994 * this one (kmalloc'ed) is fine with interrupts disabled 5995 */ 5996 pfm_context_free(ctx); 5997 5998 return; 5999 } 6000 6001 /* 6002 * we restore ALL the debug registers to avoid picking up 6003 * stale state. 6004 */ 6005 if (ctx->ctx_fl_using_dbreg) { 6006 pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); 6007 pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); 6008 } 6009 /* 6010 * retrieve saved psr.up 6011 */ 6012 psr_up = ctx->ctx_saved_psr_up; 6013 6014 /* 6015 * if we were the last user of the PMU on that CPU, 6016 * then nothing to do except restore psr 6017 */ 6018 if (GET_LAST_CPU(ctx) == smp_processor_id() && ctx->ctx_last_activation == GET_ACTIVATION()) { 6019 6020 /* 6021 * retrieve partial reload masks (due to user modifications) 6022 */ 6023 pmc_mask = ctx->ctx_reload_pmcs[0]; 6024 pmd_mask = ctx->ctx_reload_pmds[0]; 6025 6026 } else { 6027 /* 6028 * To avoid leaking information to the user level when psr.sp=0, 6029 * we must reload ALL implemented pmds (even the ones we don't use). 6030 * In the kernel we only allow PFM_READ_PMDS on registers which 6031 * we initialized or requested (sampling) so there is no risk there. 6032 */ 6033 pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0]; 6034 6035 /* 6036 * ALL accessible PMCs are systematically reloaded, unused registers 6037 * get their default (from pfm_reset_pmu_state()) values to avoid picking 6038 * up stale configuration. 6039 * 6040 * PMC0 is never in the mask. It is always restored separately. 6041 */ 6042 pmc_mask = ctx->ctx_all_pmcs[0]; 6043 } 6044 if (pmd_mask) pfm_restore_pmds(ctx->th_pmds, pmd_mask); 6045 if (pmc_mask) pfm_restore_pmcs(ctx->th_pmcs, pmc_mask); 6046 6047 /* 6048 * check for pending overflow at the time the state 6049 * was saved. 6050 */ 6051 if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) { 6052 /* 6053 * reload pmc0 with the overflow information 6054 * On McKinley PMU, this will trigger a PMU interrupt 6055 */ 6056 ia64_set_pmc(0, ctx->th_pmcs[0]); 6057 ia64_srlz_d(); 6058 ctx->th_pmcs[0] = 0UL; 6059 6060 /* 6061 * will replay the PMU interrupt 6062 */ 6063 if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR); 6064 6065 pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++; 6066 } 6067 6068 /* 6069 * we just did a reload, so we reset the partial reload fields 6070 */ 6071 ctx->ctx_reload_pmcs[0] = 0UL; 6072 ctx->ctx_reload_pmds[0] = 0UL; 6073 6074 SET_LAST_CPU(ctx, smp_processor_id()); 6075 6076 /* 6077 * dump activation value for this PMU 6078 */ 6079 INC_ACTIVATION(); 6080 /* 6081 * record current activation for this context 6082 */ 6083 SET_ACTIVATION(ctx); 6084 6085 /* 6086 * establish new ownership. 6087 */ 6088 SET_PMU_OWNER(task, ctx); 6089 6090 /* 6091 * restore the psr.up bit. measurement 6092 * is active again. 6093 * no PMU interrupt can happen at this point 6094 * because we still have interrupts disabled. 6095 */ 6096 if (likely(psr_up)) pfm_set_psr_up(); 6097 6098 /* 6099 * allow concurrent access to context 6100 */ 6101 pfm_unprotect_ctx_ctxsw(ctx, flags); 6102} 6103#else /* !CONFIG_SMP */ 6104/* 6105 * reload PMU state for UP kernels 6106 * in 2.5 we come here with interrupts disabled 6107 */ 6108void 6109pfm_load_regs (struct task_struct *task) 6110{ 6111 pfm_context_t *ctx; 6112 struct task_struct *owner; 6113 unsigned long pmd_mask, pmc_mask; 6114 u64 psr, psr_up; 6115 int need_irq_resend; 6116 6117 owner = GET_PMU_OWNER(); 6118 ctx = PFM_GET_CTX(task); 6119 psr = pfm_get_psr(); 6120 6121 BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); 6122 BUG_ON(psr & IA64_PSR_I); 6123 6124 /* 6125 * we restore ALL the debug registers to avoid picking up 6126 * stale state. 6127 * 6128 * This must be done even when the task is still the owner 6129 * as the registers may have been modified via ptrace() 6130 * (not perfmon) by the previous task. 6131 */ 6132 if (ctx->ctx_fl_using_dbreg) { 6133 pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); 6134 pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); 6135 } 6136 6137 /* 6138 * retrieved saved psr.up 6139 */ 6140 psr_up = ctx->ctx_saved_psr_up; 6141 need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; 6142 6143 /* 6144 * short path, our state is still there, just 6145 * need to restore psr and we go 6146 * 6147 * we do not touch either PMC nor PMD. the psr is not touched 6148 * by the overflow_handler. So we are safe w.r.t. to interrupt 6149 * concurrency even without interrupt masking. 6150 */ 6151 if (likely(owner == task)) { 6152 if (likely(psr_up)) pfm_set_psr_up(); 6153 return; 6154 } 6155 6156 /* 6157 * someone else is still using the PMU, first push it out and 6158 * then we'll be able to install our stuff ! 6159 * 6160 * Upon return, there will be no owner for the current PMU 6161 */ 6162 if (owner) pfm_lazy_save_regs(owner); 6163 6164 /* 6165 * To avoid leaking information to the user level when psr.sp=0, 6166 * we must reload ALL implemented pmds (even the ones we don't use). 6167 * In the kernel we only allow PFM_READ_PMDS on registers which 6168 * we initialized or requested (sampling) so there is no risk there. 6169 */ 6170 pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0]; 6171 6172 /* 6173 * ALL accessible PMCs are systematically reloaded, unused registers 6174 * get their default (from pfm_reset_pmu_state()) values to avoid picking 6175 * up stale configuration. 6176 * 6177 * PMC0 is never in the mask. It is always restored separately 6178 */ 6179 pmc_mask = ctx->ctx_all_pmcs[0]; 6180 6181 pfm_restore_pmds(ctx->th_pmds, pmd_mask); 6182 pfm_restore_pmcs(ctx->th_pmcs, pmc_mask); 6183 6184 /* 6185 * check for pending overflow at the time the state 6186 * was saved. 6187 */ 6188 if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) { 6189 /* 6190 * reload pmc0 with the overflow information 6191 * On McKinley PMU, this will trigger a PMU interrupt 6192 */ 6193 ia64_set_pmc(0, ctx->th_pmcs[0]); 6194 ia64_srlz_d(); 6195 6196 ctx->th_pmcs[0] = 0UL; 6197 6198 /* 6199 * will replay the PMU interrupt 6200 */ 6201 if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR); 6202 6203 pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++; 6204 } 6205 6206 /* 6207 * establish new ownership. 6208 */ 6209 SET_PMU_OWNER(task, ctx); 6210 6211 /* 6212 * restore the psr.up bit. measurement 6213 * is active again. 6214 * no PMU interrupt can happen at this point 6215 * because we still have interrupts disabled. 6216 */ 6217 if (likely(psr_up)) pfm_set_psr_up(); 6218} 6219#endif /* CONFIG_SMP */ 6220 6221/* 6222 * this function assumes monitoring is stopped 6223 */ 6224static void 6225pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) 6226{ 6227 u64 pmc0; 6228 unsigned long mask2, val, pmd_val, ovfl_val; 6229 int i, can_access_pmu = 0; 6230 int is_self; 6231 6232 /* 6233 * is the caller the task being monitored (or which initiated the 6234 * session for system wide measurements) 6235 */ 6236 is_self = ctx->ctx_task == task ? 1 : 0; 6237 6238 /* 6239 * can access PMU is task is the owner of the PMU state on the current CPU 6240 * or if we are running on the CPU bound to the context in system-wide mode 6241 * (that is not necessarily the task the context is attached to in this mode). 6242 * In system-wide we always have can_access_pmu true because a task running on an 6243 * invalid processor is flagged earlier in the call stack (see pfm_stop). 6244 */ 6245 can_access_pmu = (GET_PMU_OWNER() == task) || (ctx->ctx_fl_system && ctx->ctx_cpu == smp_processor_id()); 6246 if (can_access_pmu) { 6247 /* 6248 * Mark the PMU as not owned 6249 * This will cause the interrupt handler to do nothing in case an overflow 6250 * interrupt was in-flight 6251 * This also guarantees that pmc0 will contain the final state 6252 * It virtually gives us full control on overflow processing from that point 6253 * on. 6254 */ 6255 SET_PMU_OWNER(NULL, NULL); 6256 DPRINT(("releasing ownership\n")); 6257 6258 /* 6259 * read current overflow status: 6260 * 6261 * we are guaranteed to read the final stable state 6262 */ 6263 ia64_srlz_d(); 6264 pmc0 = ia64_get_pmc(0); /* slow */ 6265 6266 /* 6267 * reset freeze bit, overflow status information destroyed 6268 */ 6269 pfm_unfreeze_pmu(); 6270 } else { 6271 pmc0 = ctx->th_pmcs[0]; 6272 /* 6273 * clear whatever overflow status bits there were 6274 */ 6275 ctx->th_pmcs[0] = 0; 6276 } 6277 ovfl_val = pmu_conf->ovfl_val; 6278 mask2 = ctx->ctx_used_pmds[0]; 6279 6280 DPRINT(("is_self=%d ovfl_val=0x%lx mask2=0x%lx\n", is_self, ovfl_val, mask2)); 6281 6282 for (i = 0; mask2; i++, mask2>>=1) { 6283 6284 /* skip non used pmds */ 6285 if ((mask2 & 0x1) == 0) continue; 6286 6287 /* 6288 * can access PMU always true in system wide mode 6289 */ 6290 val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : ctx->th_pmds[i]; 6291 6292 if (PMD_IS_COUNTING(i)) { 6293 DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n", 6294 task_pid_nr(task), 6295 i, 6296 ctx->ctx_pmds[i].val, 6297 val & ovfl_val)); 6298 6299 /* 6300 * we rebuild the full 64 bit value of the counter 6301 */ 6302 val = ctx->ctx_pmds[i].val + (val & ovfl_val); 6303 6304 /* 6305 * now everything is in ctx_pmds[] and we need 6306 * to clear the saved context from save_regs() such that 6307 * pfm_read_pmds() gets the correct value 6308 */ 6309 pmd_val = 0UL; 6310 6311 /* 6312 * take care of overflow inline 6313 */ 6314 if (pmc0 & (1UL << i)) { 6315 val += 1 + ovfl_val; 6316 DPRINT(("[%d] pmd[%d] overflowed\n", task_pid_nr(task), i)); 6317 } 6318 } 6319 6320 DPRINT(("[%d] ctx_pmd[%d]=0x%lx pmd_val=0x%lx\n", task_pid_nr(task), i, val, pmd_val)); 6321 6322 if (is_self) ctx->th_pmds[i] = pmd_val; 6323 6324 ctx->ctx_pmds[i].val = val; 6325 } 6326} 6327 6328static struct irqaction perfmon_irqaction = { 6329 .handler = pfm_interrupt_handler, 6330 .flags = IRQF_DISABLED, 6331 .name = "perfmon" 6332}; 6333 6334static void 6335pfm_alt_save_pmu_state(void *data) 6336{ 6337 struct pt_regs *regs; 6338 6339 regs = task_pt_regs(current); 6340 6341 DPRINT(("called\n")); 6342 6343 /* 6344 * should not be necessary but 6345 * let's take not risk 6346 */ 6347 pfm_clear_psr_up(); 6348 pfm_clear_psr_pp(); 6349 ia64_psr(regs)->pp = 0; 6350 6351 /* 6352 * This call is required 6353 * May cause a spurious interrupt on some processors 6354 */ 6355 pfm_freeze_pmu(); 6356 6357 ia64_srlz_d(); 6358} 6359 6360void 6361pfm_alt_restore_pmu_state(void *data) 6362{ 6363 struct pt_regs *regs; 6364 6365 regs = task_pt_regs(current); 6366 6367 DPRINT(("called\n")); 6368 6369 /* 6370 * put PMU back in state expected 6371 * by perfmon 6372 */ 6373 pfm_clear_psr_up(); 6374 pfm_clear_psr_pp(); 6375 ia64_psr(regs)->pp = 0; 6376 6377 /* 6378 * perfmon runs with PMU unfrozen at all times 6379 */ 6380 pfm_unfreeze_pmu(); 6381 6382 ia64_srlz_d(); 6383} 6384 6385int 6386pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) 6387{ 6388 int ret, i; 6389 int reserve_cpu; 6390 6391 /* some sanity checks */ 6392 if (hdl == NULL || hdl->handler == NULL) return -EINVAL; 6393 6394 /* do the easy test first */ 6395 if (pfm_alt_intr_handler) return -EBUSY; 6396 6397 /* one at a time in the install or remove, just fail the others */ 6398 if (!spin_trylock(&pfm_alt_install_check)) { 6399 return -EBUSY; 6400 } 6401 6402 /* reserve our session */ 6403 for_each_online_cpu(reserve_cpu) { 6404 ret = pfm_reserve_session(NULL, 1, reserve_cpu); 6405 if (ret) goto cleanup_reserve; 6406 } 6407 6408 /* save the current system wide pmu states */ 6409 ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 1); 6410 if (ret) { 6411 DPRINT(("on_each_cpu() failed: %d\n", ret)); 6412 goto cleanup_reserve; 6413 } 6414 6415 /* officially change to the alternate interrupt handler */ 6416 pfm_alt_intr_handler = hdl; 6417 6418 spin_unlock(&pfm_alt_install_check); 6419 6420 return 0; 6421 6422cleanup_reserve: 6423 for_each_online_cpu(i) { 6424 /* don't unreserve more than we reserved */ 6425 if (i >= reserve_cpu) break; 6426 6427 pfm_unreserve_session(NULL, 1, i); 6428 } 6429 6430 spin_unlock(&pfm_alt_install_check); 6431 6432 return ret; 6433} 6434EXPORT_SYMBOL_GPL(pfm_install_alt_pmu_interrupt); 6435 6436int 6437pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) 6438{ 6439 int i; 6440 int ret; 6441 6442 if (hdl == NULL) return -EINVAL; 6443 6444 /* cannot remove someone else's handler! */ 6445 if (pfm_alt_intr_handler != hdl) return -EINVAL; 6446 6447 /* one at a time in the install or remove, just fail the others */ 6448 if (!spin_trylock(&pfm_alt_install_check)) { 6449 return -EBUSY; 6450 } 6451 6452 pfm_alt_intr_handler = NULL; 6453 6454 ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1); 6455 if (ret) { 6456 DPRINT(("on_each_cpu() failed: %d\n", ret)); 6457 } 6458 6459 for_each_online_cpu(i) { 6460 pfm_unreserve_session(NULL, 1, i); 6461 } 6462 6463 spin_unlock(&pfm_alt_install_check); 6464 6465 return 0; 6466} 6467EXPORT_SYMBOL_GPL(pfm_remove_alt_pmu_interrupt); 6468 6469/* 6470 * perfmon initialization routine, called from the initcall() table 6471 */ 6472static int init_pfm_fs(void); 6473 6474static int __init 6475pfm_probe_pmu(void) 6476{ 6477 pmu_config_t **p; 6478 int family; 6479 6480 family = local_cpu_data->family; 6481 p = pmu_confs; 6482 6483 while(*p) { 6484 if ((*p)->probe) { 6485 if ((*p)->probe() == 0) goto found; 6486 } else if ((*p)->pmu_family == family || (*p)->pmu_family == 0xff) { 6487 goto found; 6488 } 6489 p++; 6490 } 6491 return -1; 6492found: 6493 pmu_conf = *p; 6494 return 0; 6495} 6496 6497static const struct file_operations pfm_proc_fops = { 6498 .open = pfm_proc_open, 6499 .read = seq_read, 6500 .llseek = seq_lseek, 6501 .release = seq_release, 6502}; 6503 6504int __init 6505pfm_init(void) 6506{ 6507 unsigned int n, n_counters, i; 6508 6509 printk("perfmon: version %u.%u IRQ %u\n", 6510 PFM_VERSION_MAJ, 6511 PFM_VERSION_MIN, 6512 IA64_PERFMON_VECTOR); 6513 6514 if (pfm_probe_pmu()) { 6515 printk(KERN_INFO "perfmon: disabled, there is no support for processor family %d\n", 6516 local_cpu_data->family); 6517 return -ENODEV; 6518 } 6519 6520 /* 6521 * compute the number of implemented PMD/PMC from the 6522 * description tables 6523 */ 6524 n = 0; 6525 for (i=0; PMC_IS_LAST(i) == 0; i++) { 6526 if (PMC_IS_IMPL(i) == 0) continue; 6527 pmu_conf->impl_pmcs[i>>6] |= 1UL << (i&63); 6528 n++; 6529 } 6530 pmu_conf->num_pmcs = n; 6531 6532 n = 0; n_counters = 0; 6533 for (i=0; PMD_IS_LAST(i) == 0; i++) { 6534 if (PMD_IS_IMPL(i) == 0) continue; 6535 pmu_conf->impl_pmds[i>>6] |= 1UL << (i&63); 6536 n++; 6537 if (PMD_IS_COUNTING(i)) n_counters++; 6538 } 6539 pmu_conf->num_pmds = n; 6540 pmu_conf->num_counters = n_counters; 6541 6542 /* 6543 * sanity checks on the number of debug registers 6544 */ 6545 if (pmu_conf->use_rr_dbregs) { 6546 if (pmu_conf->num_ibrs > IA64_NUM_DBG_REGS) { 6547 printk(KERN_INFO "perfmon: unsupported number of code debug registers (%u)\n", pmu_conf->num_ibrs); 6548 pmu_conf = NULL; 6549 return -1; 6550 } 6551 if (pmu_conf->num_dbrs > IA64_NUM_DBG_REGS) { 6552 printk(KERN_INFO "perfmon: unsupported number of data debug registers (%u)\n", pmu_conf->num_ibrs); 6553 pmu_conf = NULL; 6554 return -1; 6555 } 6556 } 6557 6558 printk("perfmon: %s PMU detected, %u PMCs, %u PMDs, %u counters (%lu bits)\n", 6559 pmu_conf->pmu_name, 6560 pmu_conf->num_pmcs, 6561 pmu_conf->num_pmds, 6562 pmu_conf->num_counters, 6563 ffz(pmu_conf->ovfl_val)); 6564 6565 /* sanity check */ 6566 if (pmu_conf->num_pmds >= PFM_NUM_PMD_REGS || pmu_conf->num_pmcs >= PFM_NUM_PMC_REGS) { 6567 printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n"); 6568 pmu_conf = NULL; 6569 return -1; 6570 } 6571 6572 /* 6573 * create /proc/perfmon (mostly for debugging purposes) 6574 */ 6575 perfmon_dir = proc_create("perfmon", S_IRUGO, NULL, &pfm_proc_fops); 6576 if (perfmon_dir == NULL) { 6577 printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n"); 6578 pmu_conf = NULL; 6579 return -1; 6580 } 6581 6582 /* 6583 * create /proc/sys/kernel/perfmon (for debugging purposes) 6584 */ 6585 pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root); 6586 6587 /* 6588 * initialize all our spinlocks 6589 */ 6590 spin_lock_init(&pfm_sessions.pfs_lock); 6591 spin_lock_init(&pfm_buffer_fmt_lock); 6592 6593 init_pfm_fs(); 6594 6595 for(i=0; i < NR_CPUS; i++) pfm_stats[i].pfm_ovfl_intr_cycles_min = ~0UL; 6596 6597 return 0; 6598} 6599 6600__initcall(pfm_init); 6601 6602/* 6603 * this function is called before pfm_init() 6604 */ 6605void 6606pfm_init_percpu (void) 6607{ 6608 static int first_time=1; 6609 /* 6610 * make sure no measurement is active 6611 * (may inherit programmed PMCs from EFI). 6612 */ 6613 pfm_clear_psr_pp(); 6614 pfm_clear_psr_up(); 6615 6616 /* 6617 * we run with the PMU not frozen at all times 6618 */ 6619 pfm_unfreeze_pmu(); 6620 6621 if (first_time) { 6622 register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction); 6623 first_time=0; 6624 } 6625 6626 ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); 6627 ia64_srlz_d(); 6628} 6629 6630/* 6631 * used for debug purposes only 6632 */ 6633void 6634dump_pmu_state(const char *from) 6635{ 6636 struct task_struct *task; 6637 struct pt_regs *regs; 6638 pfm_context_t *ctx; 6639 unsigned long psr, dcr, info, flags; 6640 int i, this_cpu; 6641 6642 local_irq_save(flags); 6643 6644 this_cpu = smp_processor_id(); 6645 regs = task_pt_regs(current); 6646 info = PFM_CPUINFO_GET(); 6647 dcr = ia64_getreg(_IA64_REG_CR_DCR); 6648 6649 if (info == 0 && ia64_psr(regs)->pp == 0 && (dcr & IA64_DCR_PP) == 0) { 6650 local_irq_restore(flags); 6651 return; 6652 } 6653 6654 printk("CPU%d from %s() current [%d] iip=0x%lx %s\n", 6655 this_cpu, 6656 from, 6657 task_pid_nr(current), 6658 regs->cr_iip, 6659 current->comm); 6660 6661 task = GET_PMU_OWNER(); 6662 ctx = GET_PMU_CTX(); 6663 6664 printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task_pid_nr(task) : -1, ctx); 6665 6666 psr = pfm_get_psr(); 6667 6668 printk("->CPU%d pmc0=0x%lx psr.pp=%d psr.up=%d dcr.pp=%d syst_info=0x%lx user_psr.up=%d user_psr.pp=%d\n", 6669 this_cpu, 6670 ia64_get_pmc(0), 6671 psr & IA64_PSR_PP ? 1 : 0, 6672 psr & IA64_PSR_UP ? 1 : 0, 6673 dcr & IA64_DCR_PP ? 1 : 0, 6674 info, 6675 ia64_psr(regs)->up, 6676 ia64_psr(regs)->pp); 6677 6678 ia64_psr(regs)->up = 0; 6679 ia64_psr(regs)->pp = 0; 6680 6681 for (i=1; PMC_IS_LAST(i) == 0; i++) { 6682 if (PMC_IS_IMPL(i) == 0) continue; 6683 printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, ctx->th_pmcs[i]); 6684 } 6685 6686 for (i=1; PMD_IS_LAST(i) == 0; i++) { 6687 if (PMD_IS_IMPL(i) == 0) continue; 6688 printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, ctx->th_pmds[i]); 6689 } 6690 6691 if (ctx) { 6692 printk("->CPU%d ctx_state=%d vaddr=%p addr=%p fd=%d ctx_task=[%d] saved_psr_up=0x%lx\n", 6693 this_cpu, 6694 ctx->ctx_state, 6695 ctx->ctx_smpl_vaddr, 6696 ctx->ctx_smpl_hdr, 6697 ctx->ctx_msgq_head, 6698 ctx->ctx_msgq_tail, 6699 ctx->ctx_saved_psr_up); 6700 } 6701 local_irq_restore(flags); 6702} 6703 6704/* 6705 * called from process.c:copy_thread(). task is new child. 6706 */ 6707void 6708pfm_inherit(struct task_struct *task, struct pt_regs *regs) 6709{ 6710 struct thread_struct *thread; 6711 6712 DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task_pid_nr(task))); 6713 6714 thread = &task->thread; 6715 6716 /* 6717 * cut links inherited from parent (current) 6718 */ 6719 thread->pfm_context = NULL; 6720 6721 PFM_SET_WORK_PENDING(task, 0); 6722 6723 /* 6724 * the psr bits are already set properly in copy_threads() 6725 */ 6726} 6727#else /* !CONFIG_PERFMON */ 6728asmlinkage long 6729sys_perfmonctl (int fd, int cmd, void *arg, int count) 6730{ 6731 return -ENOSYS; 6732} 6733#endif /* CONFIG_PERFMON */ 6734