1#ifdef CONFIG_CPU_SUP_AMD 2 3static DEFINE_RAW_SPINLOCK(amd_nb_lock); 4 5static __initconst const u64 amd_hw_cache_event_ids 6 [PERF_COUNT_HW_CACHE_MAX] 7 [PERF_COUNT_HW_CACHE_OP_MAX] 8 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 9{ 10 [ C(L1D) ] = { 11 [ C(OP_READ) ] = { 12 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ 13 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ 14 }, 15 [ C(OP_WRITE) ] = { 16 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ 17 [ C(RESULT_MISS) ] = 0, 18 }, 19 [ C(OP_PREFETCH) ] = { 20 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ 21 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ 22 }, 23 }, 24 [ C(L1I ) ] = { 25 [ C(OP_READ) ] = { 26 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ 27 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ 28 }, 29 [ C(OP_WRITE) ] = { 30 [ C(RESULT_ACCESS) ] = -1, 31 [ C(RESULT_MISS) ] = -1, 32 }, 33 [ C(OP_PREFETCH) ] = { 34 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ 35 [ C(RESULT_MISS) ] = 0, 36 }, 37 }, 38 [ C(LL ) ] = { 39 [ C(OP_READ) ] = { 40 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ 41 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ 42 }, 43 [ C(OP_WRITE) ] = { 44 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ 45 [ C(RESULT_MISS) ] = 0, 46 }, 47 [ C(OP_PREFETCH) ] = { 48 [ C(RESULT_ACCESS) ] = 0, 49 [ C(RESULT_MISS) ] = 0, 50 }, 51 }, 52 [ C(DTLB) ] = { 53 [ C(OP_READ) ] = { 54 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ 55 [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */ 56 }, 57 [ C(OP_WRITE) ] = { 58 [ C(RESULT_ACCESS) ] = 0, 59 [ C(RESULT_MISS) ] = 0, 60 }, 61 [ C(OP_PREFETCH) ] = { 62 [ C(RESULT_ACCESS) ] = 0, 63 [ C(RESULT_MISS) ] = 0, 64 }, 65 }, 66 [ C(ITLB) ] = { 67 [ C(OP_READ) ] = { 68 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ 69 [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */ 70 }, 71 [ C(OP_WRITE) ] = { 72 [ C(RESULT_ACCESS) ] = -1, 73 [ C(RESULT_MISS) ] = -1, 74 }, 75 [ C(OP_PREFETCH) ] = { 76 [ C(RESULT_ACCESS) ] = -1, 77 [ C(RESULT_MISS) ] = -1, 78 }, 79 }, 80 [ C(BPU ) ] = { 81 [ C(OP_READ) ] = { 82 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ 83 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ 84 }, 85 [ C(OP_WRITE) ] = { 86 [ C(RESULT_ACCESS) ] = -1, 87 [ C(RESULT_MISS) ] = -1, 88 }, 89 [ C(OP_PREFETCH) ] = { 90 [ C(RESULT_ACCESS) ] = -1, 91 [ C(RESULT_MISS) ] = -1, 92 }, 93 }, 94}; 95 96/* 97 * AMD Performance Monitor K7 and later. 98 */ 99static const u64 amd_perfmon_event_map[] = 100{ 101 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, 102 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 103 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, 104 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, 105 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, 106 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, 107}; 108 109static u64 amd_pmu_event_map(int hw_event) 110{ 111 return amd_perfmon_event_map[hw_event]; 112} 113 114static int amd_pmu_hw_config(struct perf_event *event) 115{ 116 int ret = x86_pmu_hw_config(event); 117 118 if (ret) 119 return ret; 120 121 if (event->attr.type != PERF_TYPE_RAW) 122 return 0; 123 124 event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; 125 126 return 0; 127} 128 129/* 130 * AMD64 events are detected based on their event codes. 131 */ 132static inline int amd_is_nb_event(struct hw_perf_event *hwc) 133{ 134 return (hwc->config & 0xe0) == 0xe0; 135} 136 137static inline int amd_has_nb(struct cpu_hw_events *cpuc) 138{ 139 struct amd_nb *nb = cpuc->amd_nb; 140 141 return nb && nb->nb_id != -1; 142} 143 144static void amd_put_event_constraints(struct cpu_hw_events *cpuc, 145 struct perf_event *event) 146{ 147 struct hw_perf_event *hwc = &event->hw; 148 struct amd_nb *nb = cpuc->amd_nb; 149 int i; 150 151 /* 152 * only care about NB events 153 */ 154 if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) 155 return; 156 157 /* 158 * need to scan whole list because event may not have 159 * been assigned during scheduling 160 * 161 * no race condition possible because event can only 162 * be removed on one CPU at a time AND PMU is disabled 163 * when we come here 164 */ 165 for (i = 0; i < x86_pmu.num_counters; i++) { 166 if (nb->owners[i] == event) { 167 cmpxchg(nb->owners+i, event, NULL); 168 break; 169 } 170 } 171} 172 173 /* 174 * AMD64 NorthBridge events need special treatment because 175 * counter access needs to be synchronized across all cores 176 * of a package. Refer to BKDG section 3.12 177 * 178 * NB events are events measuring L3 cache, Hypertransport 179 * traffic. They are identified by an event code >= 0xe00. 180 * They measure events on the NorthBride which is shared 181 * by all cores on a package. NB events are counted on a 182 * shared set of counters. When a NB event is programmed 183 * in a counter, the data actually comes from a shared 184 * counter. Thus, access to those counters needs to be 185 * synchronized. 186 * 187 * We implement the synchronization such that no two cores 188 * can be measuring NB events using the same counters. Thus, 189 * we maintain a per-NB allocation table. The available slot 190 * is propagated using the event_constraint structure. 191 * 192 * We provide only one choice for each NB event based on 193 * the fact that only NB events have restrictions. Consequently, 194 * if a counter is available, there is a guarantee the NB event 195 * will be assigned to it. If no slot is available, an empty 196 * constraint is returned and scheduling will eventually fail 197 * for this event. 198 * 199 * Note that all cores attached the same NB compete for the same 200 * counters to host NB events, this is why we use atomic ops. Some 201 * multi-chip CPUs may have more than one NB. 202 * 203 * Given that resources are allocated (cmpxchg), they must be 204 * eventually freed for others to use. This is accomplished by 205 * calling amd_put_event_constraints(). 206 * 207 * Non NB events are not impacted by this restriction. 208 */ 209static struct event_constraint * 210amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) 211{ 212 struct hw_perf_event *hwc = &event->hw; 213 struct amd_nb *nb = cpuc->amd_nb; 214 struct perf_event *old = NULL; 215 int max = x86_pmu.num_counters; 216 int i, j, k = -1; 217 218 /* 219 * if not NB event or no NB, then no constraints 220 */ 221 if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) 222 return &unconstrained; 223 224 /* 225 * detect if already present, if so reuse 226 * 227 * cannot merge with actual allocation 228 * because of possible holes 229 * 230 * event can already be present yet not assigned (in hwc->idx) 231 * because of successive calls to x86_schedule_events() from 232 * hw_perf_group_sched_in() without hw_perf_enable() 233 */ 234 for (i = 0; i < max; i++) { 235 /* 236 * keep track of first free slot 237 */ 238 if (k == -1 && !nb->owners[i]) 239 k = i; 240 241 /* already present, reuse */ 242 if (nb->owners[i] == event) 243 goto done; 244 } 245 /* 246 * not present, so grab a new slot 247 * starting either at: 248 */ 249 if (hwc->idx != -1) { 250 /* previous assignment */ 251 i = hwc->idx; 252 } else if (k != -1) { 253 /* start from free slot found */ 254 i = k; 255 } else { 256 /* 257 * event not found, no slot found in 258 * first pass, try again from the 259 * beginning 260 */ 261 i = 0; 262 } 263 j = i; 264 do { 265 old = cmpxchg(nb->owners+i, NULL, event); 266 if (!old) 267 break; 268 if (++i == max) 269 i = 0; 270 } while (i != j); 271done: 272 if (!old) 273 return &nb->event_constraints[i]; 274 275 return &emptyconstraint; 276} 277 278static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) 279{ 280 struct amd_nb *nb; 281 int i; 282 283 nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL); 284 if (!nb) 285 return NULL; 286 287 memset(nb, 0, sizeof(*nb)); 288 nb->nb_id = nb_id; 289 290 /* 291 * initialize all possible NB constraints 292 */ 293 for (i = 0; i < x86_pmu.num_counters; i++) { 294 __set_bit(i, nb->event_constraints[i].idxmsk); 295 nb->event_constraints[i].weight = 1; 296 } 297 return nb; 298} 299 300static int amd_pmu_cpu_prepare(int cpu) 301{ 302 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 303 304 WARN_ON_ONCE(cpuc->amd_nb); 305 306 if (boot_cpu_data.x86_max_cores < 2) 307 return NOTIFY_OK; 308 309 cpuc->amd_nb = amd_alloc_nb(cpu, -1); 310 if (!cpuc->amd_nb) 311 return NOTIFY_BAD; 312 313 return NOTIFY_OK; 314} 315 316static void amd_pmu_cpu_starting(int cpu) 317{ 318 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 319 struct amd_nb *nb; 320 int i, nb_id; 321 322 if (boot_cpu_data.x86_max_cores < 2) 323 return; 324 325 nb_id = amd_get_nb_id(cpu); 326 WARN_ON_ONCE(nb_id == BAD_APICID); 327 328 raw_spin_lock(&amd_nb_lock); 329 330 for_each_online_cpu(i) { 331 nb = per_cpu(cpu_hw_events, i).amd_nb; 332 if (WARN_ON_ONCE(!nb)) 333 continue; 334 335 if (nb->nb_id == nb_id) { 336 kfree(cpuc->amd_nb); 337 cpuc->amd_nb = nb; 338 break; 339 } 340 } 341 342 cpuc->amd_nb->nb_id = nb_id; 343 cpuc->amd_nb->refcnt++; 344 345 raw_spin_unlock(&amd_nb_lock); 346} 347 348static void amd_pmu_cpu_dead(int cpu) 349{ 350 struct cpu_hw_events *cpuhw; 351 352 if (boot_cpu_data.x86_max_cores < 2) 353 return; 354 355 cpuhw = &per_cpu(cpu_hw_events, cpu); 356 357 raw_spin_lock(&amd_nb_lock); 358 359 if (cpuhw->amd_nb) { 360 struct amd_nb *nb = cpuhw->amd_nb; 361 362 if (nb->nb_id == -1 || --nb->refcnt == 0) 363 kfree(nb); 364 365 cpuhw->amd_nb = NULL; 366 } 367 368 raw_spin_unlock(&amd_nb_lock); 369} 370 371static __initconst const struct x86_pmu amd_pmu = { 372 .name = "AMD", 373 .handle_irq = x86_pmu_handle_irq, 374 .disable_all = x86_pmu_disable_all, 375 .enable_all = x86_pmu_enable_all, 376 .enable = x86_pmu_enable_event, 377 .disable = x86_pmu_disable_event, 378 .hw_config = amd_pmu_hw_config, 379 .schedule_events = x86_schedule_events, 380 .eventsel = MSR_K7_EVNTSEL0, 381 .perfctr = MSR_K7_PERFCTR0, 382 .event_map = amd_pmu_event_map, 383 .max_events = ARRAY_SIZE(amd_perfmon_event_map), 384 .num_counters = 4, 385 .cntval_bits = 48, 386 .cntval_mask = (1ULL << 48) - 1, 387 .apic = 1, 388 /* use highest bit to detect overflow */ 389 .max_period = (1ULL << 47) - 1, 390 .get_event_constraints = amd_get_event_constraints, 391 .put_event_constraints = amd_put_event_constraints, 392 393 .cpu_prepare = amd_pmu_cpu_prepare, 394 .cpu_starting = amd_pmu_cpu_starting, 395 .cpu_dead = amd_pmu_cpu_dead, 396}; 397 398static __init int amd_pmu_init(void) 399{ 400 /* Performance-monitoring supported from K7 and later: */ 401 if (boot_cpu_data.x86 < 6) 402 return -ENODEV; 403 404 x86_pmu = amd_pmu; 405 406 /* Events are common for all AMDs */ 407 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, 408 sizeof(hw_cache_event_ids)); 409 410 return 0; 411} 412 413#else /* CONFIG_CPU_SUP_AMD */ 414 415static int amd_pmu_init(void) 416{ 417 return 0; 418} 419 420#endif 421