1/* 2 * Netburst Perfomance Events (P4, old Xeon) 3 * 4 * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org> 5 * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com> 6 * 7 * For licencing details see kernel-base/COPYING 8 */ 9 10#ifdef CONFIG_CPU_SUP_INTEL 11 12#include <asm/perf_event_p4.h> 13 14#define P4_CNTR_LIMIT 3 15/* 16 * array indices: 0,1 - HT threads, used with HT enabled cpu 17 */ 18struct p4_event_bind { 19 unsigned int opcode; /* Event code and ESCR selector */ 20 unsigned int escr_msr[2]; /* ESCR MSR for this event */ 21 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ 22}; 23 24struct p4_pebs_bind { 25 unsigned int metric_pebs; 26 unsigned int metric_vert; 27}; 28 29/* it sets P4_PEBS_ENABLE_UOP_TAG as well */ 30#define P4_GEN_PEBS_BIND(name, pebs, vert) \ 31 [P4_PEBS_METRIC__##name] = { \ 32 .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \ 33 .metric_vert = vert, \ 34 } 35 36/* 37 * note we have P4_PEBS_ENABLE_UOP_TAG always set here 38 * 39 * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of 40 * event configuration to find out which values are to be 41 * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT 42 * resgisters 43 */ 44static struct p4_pebs_bind p4_pebs_bind_map[] = { 45 P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001), 46 P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001), 47 P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001), 48 P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002), 49 P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003), 50 P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010), 51 P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001), 52 P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001), 53 P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002), 54}; 55 56static struct p4_event_bind p4_event_bind_map[] = { 57 [P4_EVENT_TC_DELIVER_MODE] = { 58 .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), 59 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, 60 .cntr = { {4, 5, -1}, {6, 7, -1} }, 61 }, 62 [P4_EVENT_BPU_FETCH_REQUEST] = { 63 .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), 64 .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, 65 .cntr = { {0, -1, -1}, {2, -1, -1} }, 66 }, 67 [P4_EVENT_ITLB_REFERENCE] = { 68 .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), 69 .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, 70 .cntr = { {0, -1, -1}, {2, -1, -1} }, 71 }, 72 [P4_EVENT_MEMORY_CANCEL] = { 73 .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), 74 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, 75 .cntr = { {8, 9, -1}, {10, 11, -1} }, 76 }, 77 [P4_EVENT_MEMORY_COMPLETE] = { 78 .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), 79 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, 80 .cntr = { {8, 9, -1}, {10, 11, -1} }, 81 }, 82 [P4_EVENT_LOAD_PORT_REPLAY] = { 83 .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), 84 .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, 85 .cntr = { {8, 9, -1}, {10, 11, -1} }, 86 }, 87 [P4_EVENT_STORE_PORT_REPLAY] = { 88 .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), 89 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, 90 .cntr = { {8, 9, -1}, {10, 11, -1} }, 91 }, 92 [P4_EVENT_MOB_LOAD_REPLAY] = { 93 .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), 94 .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, 95 .cntr = { {0, -1, -1}, {2, -1, -1} }, 96 }, 97 [P4_EVENT_PAGE_WALK_TYPE] = { 98 .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), 99 .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, 100 .cntr = { {0, -1, -1}, {2, -1, -1} }, 101 }, 102 [P4_EVENT_BSQ_CACHE_REFERENCE] = { 103 .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), 104 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, 105 .cntr = { {0, -1, -1}, {2, -1, -1} }, 106 }, 107 [P4_EVENT_IOQ_ALLOCATION] = { 108 .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), 109 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 110 .cntr = { {0, -1, -1}, {2, -1, -1} }, 111 }, 112 [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ 113 .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), 114 .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, 115 .cntr = { {2, -1, -1}, {3, -1, -1} }, 116 }, 117 [P4_EVENT_FSB_DATA_ACTIVITY] = { 118 .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), 119 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 120 .cntr = { {0, -1, -1}, {2, -1, -1} }, 121 }, 122 [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ 123 .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), 124 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, 125 .cntr = { {0, -1, -1}, {1, -1, -1} }, 126 }, 127 [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ 128 .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), 129 .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, 130 .cntr = { {2, -1, -1}, {3, -1, -1} }, 131 }, 132 [P4_EVENT_SSE_INPUT_ASSIST] = { 133 .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), 134 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 135 .cntr = { {8, 9, -1}, {10, 11, -1} }, 136 }, 137 [P4_EVENT_PACKED_SP_UOP] = { 138 .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), 139 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 140 .cntr = { {8, 9, -1}, {10, 11, -1} }, 141 }, 142 [P4_EVENT_PACKED_DP_UOP] = { 143 .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), 144 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 145 .cntr = { {8, 9, -1}, {10, 11, -1} }, 146 }, 147 [P4_EVENT_SCALAR_SP_UOP] = { 148 .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), 149 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 150 .cntr = { {8, 9, -1}, {10, 11, -1} }, 151 }, 152 [P4_EVENT_SCALAR_DP_UOP] = { 153 .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), 154 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 155 .cntr = { {8, 9, -1}, {10, 11, -1} }, 156 }, 157 [P4_EVENT_64BIT_MMX_UOP] = { 158 .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), 159 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 160 .cntr = { {8, 9, -1}, {10, 11, -1} }, 161 }, 162 [P4_EVENT_128BIT_MMX_UOP] = { 163 .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), 164 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 165 .cntr = { {8, 9, -1}, {10, 11, -1} }, 166 }, 167 [P4_EVENT_X87_FP_UOP] = { 168 .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), 169 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 170 .cntr = { {8, 9, -1}, {10, 11, -1} }, 171 }, 172 [P4_EVENT_TC_MISC] = { 173 .opcode = P4_OPCODE(P4_EVENT_TC_MISC), 174 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, 175 .cntr = { {4, 5, -1}, {6, 7, -1} }, 176 }, 177 [P4_EVENT_GLOBAL_POWER_EVENTS] = { 178 .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), 179 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 180 .cntr = { {0, -1, -1}, {2, -1, -1} }, 181 }, 182 [P4_EVENT_TC_MS_XFER] = { 183 .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), 184 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, 185 .cntr = { {4, 5, -1}, {6, 7, -1} }, 186 }, 187 [P4_EVENT_UOP_QUEUE_WRITES] = { 188 .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), 189 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, 190 .cntr = { {4, 5, -1}, {6, 7, -1} }, 191 }, 192 [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { 193 .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), 194 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, 195 .cntr = { {4, 5, -1}, {6, 7, -1} }, 196 }, 197 [P4_EVENT_RETIRED_BRANCH_TYPE] = { 198 .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), 199 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, 200 .cntr = { {4, 5, -1}, {6, 7, -1} }, 201 }, 202 [P4_EVENT_RESOURCE_STALL] = { 203 .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), 204 .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, 205 .cntr = { {12, 13, 16}, {14, 15, 17} }, 206 }, 207 [P4_EVENT_WC_BUFFER] = { 208 .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), 209 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, 210 .cntr = { {8, 9, -1}, {10, 11, -1} }, 211 }, 212 [P4_EVENT_B2B_CYCLES] = { 213 .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), 214 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 215 .cntr = { {0, -1, -1}, {2, -1, -1} }, 216 }, 217 [P4_EVENT_BNR] = { 218 .opcode = P4_OPCODE(P4_EVENT_BNR), 219 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 220 .cntr = { {0, -1, -1}, {2, -1, -1} }, 221 }, 222 [P4_EVENT_SNOOP] = { 223 .opcode = P4_OPCODE(P4_EVENT_SNOOP), 224 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 225 .cntr = { {0, -1, -1}, {2, -1, -1} }, 226 }, 227 [P4_EVENT_RESPONSE] = { 228 .opcode = P4_OPCODE(P4_EVENT_RESPONSE), 229 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 230 .cntr = { {0, -1, -1}, {2, -1, -1} }, 231 }, 232 [P4_EVENT_FRONT_END_EVENT] = { 233 .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), 234 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 235 .cntr = { {12, 13, 16}, {14, 15, 17} }, 236 }, 237 [P4_EVENT_EXECUTION_EVENT] = { 238 .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), 239 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 240 .cntr = { {12, 13, 16}, {14, 15, 17} }, 241 }, 242 [P4_EVENT_REPLAY_EVENT] = { 243 .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), 244 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 245 .cntr = { {12, 13, 16}, {14, 15, 17} }, 246 }, 247 [P4_EVENT_INSTR_RETIRED] = { 248 .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), 249 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 250 .cntr = { {12, 13, 16}, {14, 15, 17} }, 251 }, 252 [P4_EVENT_UOPS_RETIRED] = { 253 .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), 254 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 255 .cntr = { {12, 13, 16}, {14, 15, 17} }, 256 }, 257 [P4_EVENT_UOP_TYPE] = { 258 .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), 259 .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, 260 .cntr = { {12, 13, 16}, {14, 15, 17} }, 261 }, 262 [P4_EVENT_BRANCH_RETIRED] = { 263 .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), 264 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 265 .cntr = { {12, 13, 16}, {14, 15, 17} }, 266 }, 267 [P4_EVENT_MISPRED_BRANCH_RETIRED] = { 268 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), 269 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 270 .cntr = { {12, 13, 16}, {14, 15, 17} }, 271 }, 272 [P4_EVENT_X87_ASSIST] = { 273 .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), 274 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 275 .cntr = { {12, 13, 16}, {14, 15, 17} }, 276 }, 277 [P4_EVENT_MACHINE_CLEAR] = { 278 .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), 279 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 280 .cntr = { {12, 13, 16}, {14, 15, 17} }, 281 }, 282 [P4_EVENT_INSTR_COMPLETED] = { 283 .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), 284 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 285 .cntr = { {12, 13, 16}, {14, 15, 17} }, 286 }, 287}; 288 289#define P4_GEN_CACHE_EVENT(event, bit, metric) \ 290 p4_config_pack_escr(P4_ESCR_EVENT(event) | \ 291 P4_ESCR_EMASK_BIT(event, bit)) | \ 292 p4_config_pack_cccr(metric | \ 293 P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) 294 295static __initconst const u64 p4_hw_cache_event_ids 296 [PERF_COUNT_HW_CACHE_MAX] 297 [PERF_COUNT_HW_CACHE_OP_MAX] 298 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 299{ 300 [ C(L1D ) ] = { 301 [ C(OP_READ) ] = { 302 [ C(RESULT_ACCESS) ] = 0x0, 303 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, 304 P4_PEBS_METRIC__1stl_cache_load_miss_retired), 305 }, 306 }, 307 [ C(LL ) ] = { 308 [ C(OP_READ) ] = { 309 [ C(RESULT_ACCESS) ] = 0x0, 310 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, 311 P4_PEBS_METRIC__2ndl_cache_load_miss_retired), 312 }, 313}, 314 [ C(DTLB) ] = { 315 [ C(OP_READ) ] = { 316 [ C(RESULT_ACCESS) ] = 0x0, 317 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, 318 P4_PEBS_METRIC__dtlb_load_miss_retired), 319 }, 320 [ C(OP_WRITE) ] = { 321 [ C(RESULT_ACCESS) ] = 0x0, 322 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, 323 P4_PEBS_METRIC__dtlb_store_miss_retired), 324 }, 325 }, 326 [ C(ITLB) ] = { 327 [ C(OP_READ) ] = { 328 [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, 329 P4_PEBS_METRIC__none), 330 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, 331 P4_PEBS_METRIC__none), 332 }, 333 [ C(OP_WRITE) ] = { 334 [ C(RESULT_ACCESS) ] = -1, 335 [ C(RESULT_MISS) ] = -1, 336 }, 337 [ C(OP_PREFETCH) ] = { 338 [ C(RESULT_ACCESS) ] = -1, 339 [ C(RESULT_MISS) ] = -1, 340 }, 341 }, 342}; 343 344static u64 p4_general_events[PERF_COUNT_HW_MAX] = { 345 /* non-halted CPU clocks */ 346 [PERF_COUNT_HW_CPU_CYCLES] = 347 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | 348 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), 349 350 /* 351 * retired instructions 352 * in a sake of simplicity we don't use the FSB tagging 353 */ 354 [PERF_COUNT_HW_INSTRUCTIONS] = 355 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) | 356 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | 357 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)), 358 359 /* cache hits */ 360 [PERF_COUNT_HW_CACHE_REFERENCES] = 361 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | 362 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | 363 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | 364 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | 365 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | 366 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | 367 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)), 368 369 /* cache misses */ 370 [PERF_COUNT_HW_CACHE_MISSES] = 371 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | 372 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | 373 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | 374 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)), 375 376 /* branch instructions retired */ 377 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 378 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) | 379 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | 380 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | 381 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | 382 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)), 383 384 /* mispredicted branches retired */ 385 [PERF_COUNT_HW_BRANCH_MISSES] = 386 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) | 387 P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)), 388 389 /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */ 390 [PERF_COUNT_HW_BUS_CYCLES] = 391 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) | 392 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | 393 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) | 394 p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), 395}; 396 397static struct p4_event_bind *p4_config_get_bind(u64 config) 398{ 399 unsigned int evnt = p4_config_unpack_event(config); 400 struct p4_event_bind *bind = NULL; 401 402 if (evnt < ARRAY_SIZE(p4_event_bind_map)) 403 bind = &p4_event_bind_map[evnt]; 404 405 return bind; 406} 407 408static u64 p4_pmu_event_map(int hw_event) 409{ 410 struct p4_event_bind *bind; 411 unsigned int esel; 412 u64 config; 413 414 config = p4_general_events[hw_event]; 415 bind = p4_config_get_bind(config); 416 esel = P4_OPCODE_ESEL(bind->opcode); 417 config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel)); 418 419 return config; 420} 421 422static int p4_validate_raw_event(struct perf_event *event) 423{ 424 unsigned int v; 425 426 /* user data may have out-of-bound event index */ 427 v = p4_config_unpack_event(event->attr.config); 428 if (v >= ARRAY_SIZE(p4_event_bind_map)) { 429 pr_warning("P4 PMU: Unknown event code: %d\n", v); 430 return -EINVAL; 431 } 432 433 /* 434 * it may have some screwed PEBS bits 435 */ 436 if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) { 437 pr_warning("P4 PMU: PEBS are not supported yet\n"); 438 return -EINVAL; 439 } 440 v = p4_config_unpack_metric(event->attr.config); 441 if (v >= ARRAY_SIZE(p4_pebs_bind_map)) { 442 pr_warning("P4 PMU: Unknown metric code: %d\n", v); 443 return -EINVAL; 444 } 445 446 return 0; 447} 448 449static int p4_hw_config(struct perf_event *event) 450{ 451 int cpu = get_cpu(); 452 int rc = 0; 453 u32 escr, cccr; 454 455 /* 456 * the reason we use cpu that early is that: if we get scheduled 457 * first time on the same cpu -- we will not need swap thread 458 * specific flags in config (and will save some cpu cycles) 459 */ 460 461 cccr = p4_default_cccr_conf(cpu); 462 escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel, 463 event->attr.exclude_user); 464 event->hw.config = p4_config_pack_escr(escr) | 465 p4_config_pack_cccr(cccr); 466 467 if (p4_ht_active() && p4_ht_thread(cpu)) 468 event->hw.config = p4_set_ht_bit(event->hw.config); 469 470 if (event->attr.type == PERF_TYPE_RAW) { 471 472 rc = p4_validate_raw_event(event); 473 if (rc) 474 goto out; 475 476 event->hw.config |= event->attr.config & 477 (p4_config_pack_escr(P4_ESCR_MASK_HT) | 478 p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED)); 479 480 event->hw.config &= ~P4_CCCR_FORCE_OVF; 481 } 482 483 rc = x86_setup_perfctr(event); 484out: 485 put_cpu(); 486 return rc; 487} 488 489static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) 490{ 491 int overflow = 0; 492 u32 low, high; 493 494 rdmsr(hwc->config_base + hwc->idx, low, high); 495 496 /* we need to check high bit for unflagged overflows */ 497 if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) { 498 overflow = 1; 499 (void)checking_wrmsrl(hwc->config_base + hwc->idx, 500 ((u64)low) & ~P4_CCCR_OVF); 501 } 502 503 return overflow; 504} 505 506static void p4_pmu_disable_pebs(void) 507{ 508} 509 510static inline void p4_pmu_disable_event(struct perf_event *event) 511{ 512 struct hw_perf_event *hwc = &event->hw; 513 514 /* 515 * If event gets disabled while counter is in overflowed 516 * state we need to clear P4_CCCR_OVF, otherwise interrupt get 517 * asserted again and again 518 */ 519 (void)checking_wrmsrl(hwc->config_base + hwc->idx, 520 (u64)(p4_config_unpack_cccr(hwc->config)) & 521 ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); 522} 523 524static void p4_pmu_disable_all(void) 525{ 526 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 527 int idx; 528 529 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 530 struct perf_event *event = cpuc->events[idx]; 531 if (!test_bit(idx, cpuc->active_mask)) 532 continue; 533 p4_pmu_disable_event(event); 534 } 535 536 p4_pmu_disable_pebs(); 537} 538 539/* configuration must be valid */ 540static void p4_pmu_enable_pebs(u64 config) 541{ 542 struct p4_pebs_bind *bind; 543 unsigned int idx; 544 545 BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK); 546 547 idx = p4_config_unpack_metric(config); 548 if (idx == P4_PEBS_METRIC__none) 549 return; 550 551 bind = &p4_pebs_bind_map[idx]; 552 553 (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs); 554 (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); 555} 556 557static void p4_pmu_enable_event(struct perf_event *event) 558{ 559 struct hw_perf_event *hwc = &event->hw; 560 int thread = p4_ht_config_thread(hwc->config); 561 u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); 562 unsigned int idx = p4_config_unpack_event(hwc->config); 563 struct p4_event_bind *bind; 564 u64 escr_addr, cccr; 565 566 bind = &p4_event_bind_map[idx]; 567 escr_addr = (u64)bind->escr_msr[thread]; 568 569 /* 570 * - we dont support cascaded counters yet 571 * - and counter 1 is broken (erratum) 572 */ 573 WARN_ON_ONCE(p4_is_event_cascaded(hwc->config)); 574 WARN_ON_ONCE(hwc->idx == 1); 575 576 /* we need a real Event value */ 577 escr_conf &= ~P4_ESCR_EVENT_MASK; 578 escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode)); 579 580 cccr = p4_config_unpack_cccr(hwc->config); 581 582 /* 583 * it could be Cache event so we need to write metrics 584 * into additional MSRs 585 */ 586 p4_pmu_enable_pebs(hwc->config); 587 588 (void)checking_wrmsrl(escr_addr, escr_conf); 589 (void)checking_wrmsrl(hwc->config_base + hwc->idx, 590 (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); 591} 592 593static void p4_pmu_enable_all(int added) 594{ 595 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 596 int idx; 597 598 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 599 struct perf_event *event = cpuc->events[idx]; 600 if (!test_bit(idx, cpuc->active_mask)) 601 continue; 602 p4_pmu_enable_event(event); 603 } 604} 605 606static int p4_pmu_handle_irq(struct pt_regs *regs) 607{ 608 struct perf_sample_data data; 609 struct cpu_hw_events *cpuc; 610 struct perf_event *event; 611 struct hw_perf_event *hwc; 612 int idx, handled = 0; 613 u64 val; 614 615 data.addr = 0; 616 data.raw = NULL; 617 618 cpuc = &__get_cpu_var(cpu_hw_events); 619 620 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 621 int overflow; 622 623 if (!test_bit(idx, cpuc->active_mask)) { 624 /* catch in-flight IRQs */ 625 if (__test_and_clear_bit(idx, cpuc->running)) 626 handled++; 627 continue; 628 } 629 630 event = cpuc->events[idx]; 631 hwc = &event->hw; 632 633 WARN_ON_ONCE(hwc->idx != idx); 634 635 /* it might be unflagged overflow */ 636 overflow = p4_pmu_clear_cccr_ovf(hwc); 637 638 val = x86_perf_event_update(event); 639 if (!overflow && (val & (1ULL << (x86_pmu.cntval_bits - 1)))) 640 continue; 641 642 handled += overflow; 643 644 /* event overflow for sure */ 645 data.period = event->hw.last_period; 646 647 if (!x86_perf_event_set_period(event)) 648 continue; 649 if (perf_event_overflow(event, 1, &data, regs)) 650 p4_pmu_disable_event(event); 651 } 652 653 if (handled) { 654 /* p4 quirk: unmask it again */ 655 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); 656 inc_irq_stat(apic_perf_irqs); 657 } 658 659 return handled; 660} 661 662/* 663 * swap thread specific fields according to a thread 664 * we are going to run on 665 */ 666static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) 667{ 668 u32 escr, cccr; 669 670 /* 671 * we either lucky and continue on same cpu or no HT support 672 */ 673 if (!p4_should_swap_ts(hwc->config, cpu)) 674 return; 675 676 /* 677 * the event is migrated from an another logical 678 * cpu, so we need to swap thread specific flags 679 */ 680 681 escr = p4_config_unpack_escr(hwc->config); 682 cccr = p4_config_unpack_cccr(hwc->config); 683 684 if (p4_ht_thread(cpu)) { 685 cccr &= ~P4_CCCR_OVF_PMI_T0; 686 cccr |= P4_CCCR_OVF_PMI_T1; 687 if (escr & P4_ESCR_T0_OS) { 688 escr &= ~P4_ESCR_T0_OS; 689 escr |= P4_ESCR_T1_OS; 690 } 691 if (escr & P4_ESCR_T0_USR) { 692 escr &= ~P4_ESCR_T0_USR; 693 escr |= P4_ESCR_T1_USR; 694 } 695 hwc->config = p4_config_pack_escr(escr); 696 hwc->config |= p4_config_pack_cccr(cccr); 697 hwc->config |= P4_CONFIG_HT; 698 } else { 699 cccr &= ~P4_CCCR_OVF_PMI_T1; 700 cccr |= P4_CCCR_OVF_PMI_T0; 701 if (escr & P4_ESCR_T1_OS) { 702 escr &= ~P4_ESCR_T1_OS; 703 escr |= P4_ESCR_T0_OS; 704 } 705 if (escr & P4_ESCR_T1_USR) { 706 escr &= ~P4_ESCR_T1_USR; 707 escr |= P4_ESCR_T0_USR; 708 } 709 hwc->config = p4_config_pack_escr(escr); 710 hwc->config |= p4_config_pack_cccr(cccr); 711 hwc->config &= ~P4_CONFIG_HT; 712 } 713} 714 715/* 716 * ESCR address hashing is tricky, ESCRs are not sequential 717 * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and 718 * the metric between any ESCRs is laid in range [0xa0,0xe1] 719 * 720 * so we make ~70% filled hashtable 721 */ 722 723#define P4_ESCR_MSR_BASE 0x000003a0 724#define P4_ESCR_MSR_MAX 0x000003e1 725#define P4_ESCR_MSR_TABLE_SIZE (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1) 726#define P4_ESCR_MSR_IDX(msr) (msr - P4_ESCR_MSR_BASE) 727#define P4_ESCR_MSR_TABLE_ENTRY(msr) [P4_ESCR_MSR_IDX(msr)] = msr 728 729static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = { 730 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0), 731 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1), 732 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0), 733 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1), 734 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0), 735 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1), 736 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0), 737 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1), 738 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2), 739 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3), 740 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4), 741 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5), 742 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0), 743 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1), 744 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0), 745 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1), 746 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0), 747 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1), 748 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0), 749 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1), 750 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0), 751 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1), 752 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0), 753 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1), 754 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0), 755 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1), 756 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0), 757 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1), 758 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0), 759 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1), 760 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0), 761 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1), 762 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0), 763 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1), 764 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0), 765 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1), 766 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0), 767 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1), 768 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0), 769 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1), 770 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0), 771 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1), 772 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0), 773 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1), 774 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0), 775 P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1), 776}; 777 778static int p4_get_escr_idx(unsigned int addr) 779{ 780 unsigned int idx = P4_ESCR_MSR_IDX(addr); 781 782 if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE || 783 !p4_escr_table[idx] || 784 p4_escr_table[idx] != addr)) { 785 WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr); 786 return -1; 787 } 788 789 return idx; 790} 791 792static int p4_next_cntr(int thread, unsigned long *used_mask, 793 struct p4_event_bind *bind) 794{ 795 int i, j; 796 797 for (i = 0; i < P4_CNTR_LIMIT; i++) { 798 j = bind->cntr[thread][i]; 799 if (j != -1 && !test_bit(j, used_mask)) 800 return j; 801 } 802 803 return -1; 804} 805 806static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) 807{ 808 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 809 unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)]; 810 int cpu = smp_processor_id(); 811 struct hw_perf_event *hwc; 812 struct p4_event_bind *bind; 813 unsigned int i, thread, num; 814 int cntr_idx, escr_idx; 815 816 bitmap_zero(used_mask, X86_PMC_IDX_MAX); 817 bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE); 818 819 for (i = 0, num = n; i < n; i++, num--) { 820 821 hwc = &cpuc->event_list[i]->hw; 822 thread = p4_ht_thread(cpu); 823 bind = p4_config_get_bind(hwc->config); 824 escr_idx = p4_get_escr_idx(bind->escr_msr[thread]); 825 if (unlikely(escr_idx == -1)) 826 goto done; 827 828 if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) { 829 cntr_idx = hwc->idx; 830 if (assign) 831 assign[i] = hwc->idx; 832 goto reserve; 833 } 834 835 cntr_idx = p4_next_cntr(thread, used_mask, bind); 836 if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) 837 goto done; 838 839 p4_pmu_swap_config_ts(hwc, cpu); 840 if (assign) 841 assign[i] = cntr_idx; 842reserve: 843 set_bit(cntr_idx, used_mask); 844 set_bit(escr_idx, escr_mask); 845 } 846 847done: 848 return num ? -ENOSPC : 0; 849} 850 851static __initconst const struct x86_pmu p4_pmu = { 852 .name = "Netburst P4/Xeon", 853 .handle_irq = p4_pmu_handle_irq, 854 .disable_all = p4_pmu_disable_all, 855 .enable_all = p4_pmu_enable_all, 856 .enable = p4_pmu_enable_event, 857 .disable = p4_pmu_disable_event, 858 .eventsel = MSR_P4_BPU_CCCR0, 859 .perfctr = MSR_P4_BPU_PERFCTR0, 860 .event_map = p4_pmu_event_map, 861 .max_events = ARRAY_SIZE(p4_general_events), 862 .get_event_constraints = x86_get_event_constraints, 863 /* 864 * IF HT disabled we may need to use all 865 * ARCH_P4_MAX_CCCR counters simulaneously 866 * though leave it restricted at moment assuming 867 * HT is on 868 */ 869 .num_counters = ARCH_P4_MAX_CCCR, 870 .apic = 1, 871 .cntval_bits = 40, 872 .cntval_mask = (1ULL << 40) - 1, 873 .max_period = (1ULL << 39) - 1, 874 .hw_config = p4_hw_config, 875 .schedule_events = p4_pmu_schedule_events, 876 .perfctr_second_write = 1, 877}; 878 879static __init int p4_pmu_init(void) 880{ 881 unsigned int low, high; 882 883 /* If we get stripped -- indexig fails */ 884 BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); 885 886 rdmsr(MSR_IA32_MISC_ENABLE, low, high); 887 if (!(low & (1 << 7))) { 888 pr_cont("unsupported Netburst CPU model %d ", 889 boot_cpu_data.x86_model); 890 return -ENODEV; 891 } 892 893 memcpy(hw_cache_event_ids, p4_hw_cache_event_ids, 894 sizeof(hw_cache_event_ids)); 895 896 pr_cont("Netburst events, "); 897 898 x86_pmu = p4_pmu; 899 900 return 0; 901} 902 903#endif /* CONFIG_CPU_SUP_INTEL */ 904