1// SPDX-License-Identifier: GPL-2.0-only
2#include <linux/module.h>
3
4#include <asm/cpu_device_id.h>
5#include <asm/intel-family.h>
6#include "uncore.h"
7#include "uncore_discovery.h"
8
9static bool uncore_no_discover;
10module_param(uncore_no_discover, bool, 0);
11MODULE_PARM_DESC(uncore_no_discover, "Don't enable the Intel uncore PerfMon discovery mechanism "
12				     "(default: enable the discovery mechanism).");
13struct intel_uncore_type *empty_uncore[] = { NULL, };
14struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
15struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
16struct intel_uncore_type **uncore_mmio_uncores = empty_uncore;
17
18static bool pcidrv_registered;
19struct pci_driver *uncore_pci_driver;
20/* The PCI driver for the device which the uncore doesn't own. */
21struct pci_driver *uncore_pci_sub_driver;
22/* pci bus to socket mapping */
23DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
24struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
25struct pci_extra_dev *uncore_extra_pci_dev;
26int __uncore_max_dies;
27
28/* mask of cpus that collect uncore events */
29static cpumask_t uncore_cpu_mask;
30
31/* constraint for the fixed counter */
32static struct event_constraint uncore_constraint_fixed =
33	EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
34struct event_constraint uncore_constraint_empty =
35	EVENT_CONSTRAINT(0, 0, 0);
36
37MODULE_LICENSE("GPL");
38
39int uncore_pcibus_to_dieid(struct pci_bus *bus)
40{
41	struct pci2phy_map *map;
42	int die_id = -1;
43
44	raw_spin_lock(&pci2phy_map_lock);
45	list_for_each_entry(map, &pci2phy_map_head, list) {
46		if (map->segment == pci_domain_nr(bus)) {
47			die_id = map->pbus_to_dieid[bus->number];
48			break;
49		}
50	}
51	raw_spin_unlock(&pci2phy_map_lock);
52
53	return die_id;
54}
55
56int uncore_die_to_segment(int die)
57{
58	struct pci_bus *bus = NULL;
59
60	/* Find first pci bus which attributes to specified die. */
61	while ((bus = pci_find_next_bus(bus)) &&
62	       (die != uncore_pcibus_to_dieid(bus)))
63		;
64
65	return bus ? pci_domain_nr(bus) : -EINVAL;
66}
67
68int uncore_device_to_die(struct pci_dev *dev)
69{
70	int node = pcibus_to_node(dev->bus);
71	int cpu;
72
73	for_each_cpu(cpu, cpumask_of_pcibus(dev->bus)) {
74		struct cpuinfo_x86 *c = &cpu_data(cpu);
75
76		if (c->initialized && cpu_to_node(cpu) == node)
77			return c->topo.logical_die_id;
78	}
79
80	return -1;
81}
82
83static void uncore_free_pcibus_map(void)
84{
85	struct pci2phy_map *map, *tmp;
86
87	list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
88		list_del(&map->list);
89		kfree(map);
90	}
91}
92
93struct pci2phy_map *__find_pci2phy_map(int segment)
94{
95	struct pci2phy_map *map, *alloc = NULL;
96	int i;
97
98	lockdep_assert_held(&pci2phy_map_lock);
99
100lookup:
101	list_for_each_entry(map, &pci2phy_map_head, list) {
102		if (map->segment == segment)
103			goto end;
104	}
105
106	if (!alloc) {
107		raw_spin_unlock(&pci2phy_map_lock);
108		alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
109		raw_spin_lock(&pci2phy_map_lock);
110
111		if (!alloc)
112			return NULL;
113
114		goto lookup;
115	}
116
117	map = alloc;
118	alloc = NULL;
119	map->segment = segment;
120	for (i = 0; i < 256; i++)
121		map->pbus_to_dieid[i] = -1;
122	list_add_tail(&map->list, &pci2phy_map_head);
123
124end:
125	kfree(alloc);
126	return map;
127}
128
129ssize_t uncore_event_show(struct device *dev,
130			  struct device_attribute *attr, char *buf)
131{
132	struct uncore_event_desc *event =
133		container_of(attr, struct uncore_event_desc, attr);
134	return sprintf(buf, "%s", event->config);
135}
136
137struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
138{
139	unsigned int dieid = topology_logical_die_id(cpu);
140
141	/*
142	 * The unsigned check also catches the '-1' return value for non
143	 * existent mappings in the topology map.
144	 */
145	return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL;
146}
147
148u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
149{
150	u64 count;
151
152	rdmsrl(event->hw.event_base, count);
153
154	return count;
155}
156
157void uncore_mmio_exit_box(struct intel_uncore_box *box)
158{
159	if (box->io_addr)
160		iounmap(box->io_addr);
161}
162
163u64 uncore_mmio_read_counter(struct intel_uncore_box *box,
164			     struct perf_event *event)
165{
166	if (!box->io_addr)
167		return 0;
168
169	if (!uncore_mmio_is_valid_offset(box, event->hw.event_base))
170		return 0;
171
172	return readq(box->io_addr + event->hw.event_base);
173}
174
175/*
176 * generic get constraint function for shared match/mask registers.
177 */
178struct event_constraint *
179uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
180{
181	struct intel_uncore_extra_reg *er;
182	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
183	struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
184	unsigned long flags;
185	bool ok = false;
186
187	/*
188	 * reg->alloc can be set due to existing state, so for fake box we
189	 * need to ignore this, otherwise we might fail to allocate proper
190	 * fake state for this extra reg constraint.
191	 */
192	if (reg1->idx == EXTRA_REG_NONE ||
193	    (!uncore_box_is_fake(box) && reg1->alloc))
194		return NULL;
195
196	er = &box->shared_regs[reg1->idx];
197	raw_spin_lock_irqsave(&er->lock, flags);
198	if (!atomic_read(&er->ref) ||
199	    (er->config1 == reg1->config && er->config2 == reg2->config)) {
200		atomic_inc(&er->ref);
201		er->config1 = reg1->config;
202		er->config2 = reg2->config;
203		ok = true;
204	}
205	raw_spin_unlock_irqrestore(&er->lock, flags);
206
207	if (ok) {
208		if (!uncore_box_is_fake(box))
209			reg1->alloc = 1;
210		return NULL;
211	}
212
213	return &uncore_constraint_empty;
214}
215
216void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
217{
218	struct intel_uncore_extra_reg *er;
219	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
220
221	/*
222	 * Only put constraint if extra reg was actually allocated. Also
223	 * takes care of event which do not use an extra shared reg.
224	 *
225	 * Also, if this is a fake box we shouldn't touch any event state
226	 * (reg->alloc) and we don't care about leaving inconsistent box
227	 * state either since it will be thrown out.
228	 */
229	if (uncore_box_is_fake(box) || !reg1->alloc)
230		return;
231
232	er = &box->shared_regs[reg1->idx];
233	atomic_dec(&er->ref);
234	reg1->alloc = 0;
235}
236
237u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
238{
239	struct intel_uncore_extra_reg *er;
240	unsigned long flags;
241	u64 config;
242
243	er = &box->shared_regs[idx];
244
245	raw_spin_lock_irqsave(&er->lock, flags);
246	config = er->config;
247	raw_spin_unlock_irqrestore(&er->lock, flags);
248
249	return config;
250}
251
252static void uncore_assign_hw_event(struct intel_uncore_box *box,
253				   struct perf_event *event, int idx)
254{
255	struct hw_perf_event *hwc = &event->hw;
256
257	hwc->idx = idx;
258	hwc->last_tag = ++box->tags[idx];
259
260	if (uncore_pmc_fixed(hwc->idx)) {
261		hwc->event_base = uncore_fixed_ctr(box);
262		hwc->config_base = uncore_fixed_ctl(box);
263		return;
264	}
265
266	hwc->config_base = uncore_event_ctl(box, hwc->idx);
267	hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
268}
269
270void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
271{
272	u64 prev_count, new_count, delta;
273	int shift;
274
275	if (uncore_pmc_freerunning(event->hw.idx))
276		shift = 64 - uncore_freerunning_bits(box, event);
277	else if (uncore_pmc_fixed(event->hw.idx))
278		shift = 64 - uncore_fixed_ctr_bits(box);
279	else
280		shift = 64 - uncore_perf_ctr_bits(box);
281
282	/* the hrtimer might modify the previous event value */
283again:
284	prev_count = local64_read(&event->hw.prev_count);
285	new_count = uncore_read_counter(box, event);
286	if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
287		goto again;
288
289	delta = (new_count << shift) - (prev_count << shift);
290	delta >>= shift;
291
292	local64_add(delta, &event->count);
293}
294
295/*
296 * The overflow interrupt is unavailable for SandyBridge-EP, is broken
297 * for SandyBridge. So we use hrtimer to periodically poll the counter
298 * to avoid overflow.
299 */
300static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
301{
302	struct intel_uncore_box *box;
303	struct perf_event *event;
304	unsigned long flags;
305	int bit;
306
307	box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
308	if (!box->n_active || box->cpu != smp_processor_id())
309		return HRTIMER_NORESTART;
310	/*
311	 * disable local interrupt to prevent uncore_pmu_event_start/stop
312	 * to interrupt the update process
313	 */
314	local_irq_save(flags);
315
316	/*
317	 * handle boxes with an active event list as opposed to active
318	 * counters
319	 */
320	list_for_each_entry(event, &box->active_list, active_entry) {
321		uncore_perf_event_update(box, event);
322	}
323
324	for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
325		uncore_perf_event_update(box, box->events[bit]);
326
327	local_irq_restore(flags);
328
329	hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
330	return HRTIMER_RESTART;
331}
332
333void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
334{
335	hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
336		      HRTIMER_MODE_REL_PINNED);
337}
338
339void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
340{
341	hrtimer_cancel(&box->hrtimer);
342}
343
344static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
345{
346	hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
347	box->hrtimer.function = uncore_pmu_hrtimer;
348}
349
350static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
351						 int node)
352{
353	int i, size, numshared = type->num_shared_regs ;
354	struct intel_uncore_box *box;
355
356	size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
357
358	box = kzalloc_node(size, GFP_KERNEL, node);
359	if (!box)
360		return NULL;
361
362	for (i = 0; i < numshared; i++)
363		raw_spin_lock_init(&box->shared_regs[i].lock);
364
365	uncore_pmu_init_hrtimer(box);
366	box->cpu = -1;
367	box->dieid = -1;
368
369	/* set default hrtimer timeout */
370	box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
371
372	INIT_LIST_HEAD(&box->active_list);
373
374	return box;
375}
376
377/*
378 * Using uncore_pmu_event_init pmu event_init callback
379 * as a detection point for uncore events.
380 */
381static int uncore_pmu_event_init(struct perf_event *event);
382
383static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event)
384{
385	return &box->pmu->pmu == event->pmu;
386}
387
388static int
389uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
390		      bool dogrp)
391{
392	struct perf_event *event;
393	int n, max_count;
394
395	max_count = box->pmu->type->num_counters;
396	if (box->pmu->type->fixed_ctl)
397		max_count++;
398
399	if (box->n_events >= max_count)
400		return -EINVAL;
401
402	n = box->n_events;
403
404	if (is_box_event(box, leader)) {
405		box->event_list[n] = leader;
406		n++;
407	}
408
409	if (!dogrp)
410		return n;
411
412	for_each_sibling_event(event, leader) {
413		if (!is_box_event(box, event) ||
414		    event->state <= PERF_EVENT_STATE_OFF)
415			continue;
416
417		if (n >= max_count)
418			return -EINVAL;
419
420		box->event_list[n] = event;
421		n++;
422	}
423	return n;
424}
425
426static struct event_constraint *
427uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
428{
429	struct intel_uncore_type *type = box->pmu->type;
430	struct event_constraint *c;
431
432	if (type->ops->get_constraint) {
433		c = type->ops->get_constraint(box, event);
434		if (c)
435			return c;
436	}
437
438	if (event->attr.config == UNCORE_FIXED_EVENT)
439		return &uncore_constraint_fixed;
440
441	if (type->constraints) {
442		for_each_event_constraint(c, type->constraints) {
443			if ((event->hw.config & c->cmask) == c->code)
444				return c;
445		}
446	}
447
448	return &type->unconstrainted;
449}
450
451static void uncore_put_event_constraint(struct intel_uncore_box *box,
452					struct perf_event *event)
453{
454	if (box->pmu->type->ops->put_constraint)
455		box->pmu->type->ops->put_constraint(box, event);
456}
457
458static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
459{
460	unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
461	struct event_constraint *c;
462	int i, wmin, wmax, ret = 0;
463	struct hw_perf_event *hwc;
464
465	bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
466
467	for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
468		c = uncore_get_event_constraint(box, box->event_list[i]);
469		box->event_constraint[i] = c;
470		wmin = min(wmin, c->weight);
471		wmax = max(wmax, c->weight);
472	}
473
474	/* fastpath, try to reuse previous register */
475	for (i = 0; i < n; i++) {
476		hwc = &box->event_list[i]->hw;
477		c = box->event_constraint[i];
478
479		/* never assigned */
480		if (hwc->idx == -1)
481			break;
482
483		/* constraint still honored */
484		if (!test_bit(hwc->idx, c->idxmsk))
485			break;
486
487		/* not already used */
488		if (test_bit(hwc->idx, used_mask))
489			break;
490
491		__set_bit(hwc->idx, used_mask);
492		if (assign)
493			assign[i] = hwc->idx;
494	}
495	/* slow path */
496	if (i != n)
497		ret = perf_assign_events(box->event_constraint, n,
498					 wmin, wmax, n, assign);
499
500	if (!assign || ret) {
501		for (i = 0; i < n; i++)
502			uncore_put_event_constraint(box, box->event_list[i]);
503	}
504	return ret ? -EINVAL : 0;
505}
506
507void uncore_pmu_event_start(struct perf_event *event, int flags)
508{
509	struct intel_uncore_box *box = uncore_event_to_box(event);
510	int idx = event->hw.idx;
511
512	if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
513		return;
514
515	/*
516	 * Free running counter is read-only and always active.
517	 * Use the current counter value as start point.
518	 * There is no overflow interrupt for free running counter.
519	 * Use hrtimer to periodically poll the counter to avoid overflow.
520	 */
521	if (uncore_pmc_freerunning(event->hw.idx)) {
522		list_add_tail(&event->active_entry, &box->active_list);
523		local64_set(&event->hw.prev_count,
524			    uncore_read_counter(box, event));
525		if (box->n_active++ == 0)
526			uncore_pmu_start_hrtimer(box);
527		return;
528	}
529
530	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
531		return;
532
533	event->hw.state = 0;
534	box->events[idx] = event;
535	box->n_active++;
536	__set_bit(idx, box->active_mask);
537
538	local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
539	uncore_enable_event(box, event);
540
541	if (box->n_active == 1)
542		uncore_pmu_start_hrtimer(box);
543}
544
545void uncore_pmu_event_stop(struct perf_event *event, int flags)
546{
547	struct intel_uncore_box *box = uncore_event_to_box(event);
548	struct hw_perf_event *hwc = &event->hw;
549
550	/* Cannot disable free running counter which is read-only */
551	if (uncore_pmc_freerunning(hwc->idx)) {
552		list_del(&event->active_entry);
553		if (--box->n_active == 0)
554			uncore_pmu_cancel_hrtimer(box);
555		uncore_perf_event_update(box, event);
556		return;
557	}
558
559	if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
560		uncore_disable_event(box, event);
561		box->n_active--;
562		box->events[hwc->idx] = NULL;
563		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
564		hwc->state |= PERF_HES_STOPPED;
565
566		if (box->n_active == 0)
567			uncore_pmu_cancel_hrtimer(box);
568	}
569
570	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
571		/*
572		 * Drain the remaining delta count out of a event
573		 * that we are disabling:
574		 */
575		uncore_perf_event_update(box, event);
576		hwc->state |= PERF_HES_UPTODATE;
577	}
578}
579
580int uncore_pmu_event_add(struct perf_event *event, int flags)
581{
582	struct intel_uncore_box *box = uncore_event_to_box(event);
583	struct hw_perf_event *hwc = &event->hw;
584	int assign[UNCORE_PMC_IDX_MAX];
585	int i, n, ret;
586
587	if (!box)
588		return -ENODEV;
589
590	/*
591	 * The free funning counter is assigned in event_init().
592	 * The free running counter event and free running counter
593	 * are 1:1 mapped. It doesn't need to be tracked in event_list.
594	 */
595	if (uncore_pmc_freerunning(hwc->idx)) {
596		if (flags & PERF_EF_START)
597			uncore_pmu_event_start(event, 0);
598		return 0;
599	}
600
601	ret = n = uncore_collect_events(box, event, false);
602	if (ret < 0)
603		return ret;
604
605	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
606	if (!(flags & PERF_EF_START))
607		hwc->state |= PERF_HES_ARCH;
608
609	ret = uncore_assign_events(box, assign, n);
610	if (ret)
611		return ret;
612
613	/* save events moving to new counters */
614	for (i = 0; i < box->n_events; i++) {
615		event = box->event_list[i];
616		hwc = &event->hw;
617
618		if (hwc->idx == assign[i] &&
619			hwc->last_tag == box->tags[assign[i]])
620			continue;
621		/*
622		 * Ensure we don't accidentally enable a stopped
623		 * counter simply because we rescheduled.
624		 */
625		if (hwc->state & PERF_HES_STOPPED)
626			hwc->state |= PERF_HES_ARCH;
627
628		uncore_pmu_event_stop(event, PERF_EF_UPDATE);
629	}
630
631	/* reprogram moved events into new counters */
632	for (i = 0; i < n; i++) {
633		event = box->event_list[i];
634		hwc = &event->hw;
635
636		if (hwc->idx != assign[i] ||
637			hwc->last_tag != box->tags[assign[i]])
638			uncore_assign_hw_event(box, event, assign[i]);
639		else if (i < box->n_events)
640			continue;
641
642		if (hwc->state & PERF_HES_ARCH)
643			continue;
644
645		uncore_pmu_event_start(event, 0);
646	}
647	box->n_events = n;
648
649	return 0;
650}
651
652void uncore_pmu_event_del(struct perf_event *event, int flags)
653{
654	struct intel_uncore_box *box = uncore_event_to_box(event);
655	int i;
656
657	uncore_pmu_event_stop(event, PERF_EF_UPDATE);
658
659	/*
660	 * The event for free running counter is not tracked by event_list.
661	 * It doesn't need to force event->hw.idx = -1 to reassign the counter.
662	 * Because the event and the free running counter are 1:1 mapped.
663	 */
664	if (uncore_pmc_freerunning(event->hw.idx))
665		return;
666
667	for (i = 0; i < box->n_events; i++) {
668		if (event == box->event_list[i]) {
669			uncore_put_event_constraint(box, event);
670
671			for (++i; i < box->n_events; i++)
672				box->event_list[i - 1] = box->event_list[i];
673
674			--box->n_events;
675			break;
676		}
677	}
678
679	event->hw.idx = -1;
680	event->hw.last_tag = ~0ULL;
681}
682
683void uncore_pmu_event_read(struct perf_event *event)
684{
685	struct intel_uncore_box *box = uncore_event_to_box(event);
686	uncore_perf_event_update(box, event);
687}
688
689/*
690 * validation ensures the group can be loaded onto the
691 * PMU if it was the only group available.
692 */
693static int uncore_validate_group(struct intel_uncore_pmu *pmu,
694				struct perf_event *event)
695{
696	struct perf_event *leader = event->group_leader;
697	struct intel_uncore_box *fake_box;
698	int ret = -EINVAL, n;
699
700	/* The free running counter is always active. */
701	if (uncore_pmc_freerunning(event->hw.idx))
702		return 0;
703
704	fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
705	if (!fake_box)
706		return -ENOMEM;
707
708	fake_box->pmu = pmu;
709	/*
710	 * the event is not yet connected with its
711	 * siblings therefore we must first collect
712	 * existing siblings, then add the new event
713	 * before we can simulate the scheduling
714	 */
715	n = uncore_collect_events(fake_box, leader, true);
716	if (n < 0)
717		goto out;
718
719	fake_box->n_events = n;
720	n = uncore_collect_events(fake_box, event, false);
721	if (n < 0)
722		goto out;
723
724	fake_box->n_events = n;
725
726	ret = uncore_assign_events(fake_box, NULL, n);
727out:
728	kfree(fake_box);
729	return ret;
730}
731
732static int uncore_pmu_event_init(struct perf_event *event)
733{
734	struct intel_uncore_pmu *pmu;
735	struct intel_uncore_box *box;
736	struct hw_perf_event *hwc = &event->hw;
737	int ret;
738
739	if (event->attr.type != event->pmu->type)
740		return -ENOENT;
741
742	pmu = uncore_event_to_pmu(event);
743	/* no device found for this pmu */
744	if (pmu->func_id < 0)
745		return -ENOENT;
746
747	/* Sampling not supported yet */
748	if (hwc->sample_period)
749		return -EINVAL;
750
751	/*
752	 * Place all uncore events for a particular physical package
753	 * onto a single cpu
754	 */
755	if (event->cpu < 0)
756		return -EINVAL;
757	box = uncore_pmu_to_box(pmu, event->cpu);
758	if (!box || box->cpu < 0)
759		return -EINVAL;
760	event->cpu = box->cpu;
761	event->pmu_private = box;
762
763	event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
764
765	event->hw.idx = -1;
766	event->hw.last_tag = ~0ULL;
767	event->hw.extra_reg.idx = EXTRA_REG_NONE;
768	event->hw.branch_reg.idx = EXTRA_REG_NONE;
769
770	if (event->attr.config == UNCORE_FIXED_EVENT) {
771		/* no fixed counter */
772		if (!pmu->type->fixed_ctl)
773			return -EINVAL;
774		/*
775		 * if there is only one fixed counter, only the first pmu
776		 * can access the fixed counter
777		 */
778		if (pmu->type->single_fixed && pmu->pmu_idx > 0)
779			return -EINVAL;
780
781		/* fixed counters have event field hardcoded to zero */
782		hwc->config = 0ULL;
783	} else if (is_freerunning_event(event)) {
784		hwc->config = event->attr.config;
785		if (!check_valid_freerunning_event(box, event))
786			return -EINVAL;
787		event->hw.idx = UNCORE_PMC_IDX_FREERUNNING;
788		/*
789		 * The free running counter event and free running counter
790		 * are always 1:1 mapped.
791		 * The free running counter is always active.
792		 * Assign the free running counter here.
793		 */
794		event->hw.event_base = uncore_freerunning_counter(box, event);
795	} else {
796		hwc->config = event->attr.config &
797			      (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32));
798		if (pmu->type->ops->hw_config) {
799			ret = pmu->type->ops->hw_config(box, event);
800			if (ret)
801				return ret;
802		}
803	}
804
805	if (event->group_leader != event)
806		ret = uncore_validate_group(pmu, event);
807	else
808		ret = 0;
809
810	return ret;
811}
812
813static void uncore_pmu_enable(struct pmu *pmu)
814{
815	struct intel_uncore_pmu *uncore_pmu;
816	struct intel_uncore_box *box;
817
818	uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
819
820	box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
821	if (!box)
822		return;
823
824	if (uncore_pmu->type->ops->enable_box)
825		uncore_pmu->type->ops->enable_box(box);
826}
827
828static void uncore_pmu_disable(struct pmu *pmu)
829{
830	struct intel_uncore_pmu *uncore_pmu;
831	struct intel_uncore_box *box;
832
833	uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
834
835	box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
836	if (!box)
837		return;
838
839	if (uncore_pmu->type->ops->disable_box)
840		uncore_pmu->type->ops->disable_box(box);
841}
842
843static ssize_t uncore_get_attr_cpumask(struct device *dev,
844				struct device_attribute *attr, char *buf)
845{
846	return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
847}
848
849static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
850
851static struct attribute *uncore_pmu_attrs[] = {
852	&dev_attr_cpumask.attr,
853	NULL,
854};
855
856static const struct attribute_group uncore_pmu_attr_group = {
857	.attrs = uncore_pmu_attrs,
858};
859
860static inline int uncore_get_box_id(struct intel_uncore_type *type,
861				    struct intel_uncore_pmu *pmu)
862{
863	return type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx;
864}
865
866void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
867{
868	struct intel_uncore_type *type = pmu->type;
869
870	if (type->num_boxes == 1)
871		sprintf(pmu_name, "uncore_type_%u", type->type_id);
872	else {
873		sprintf(pmu_name, "uncore_type_%u_%d",
874			type->type_id, uncore_get_box_id(type, pmu));
875	}
876}
877
878static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
879{
880	struct intel_uncore_type *type = pmu->type;
881
882	/*
883	 * No uncore block name in discovery table.
884	 * Use uncore_type_&typeid_&boxid as name.
885	 */
886	if (!type->name) {
887		uncore_get_alias_name(pmu->name, pmu);
888		return;
889	}
890
891	if (type->num_boxes == 1) {
892		if (strlen(type->name) > 0)
893			sprintf(pmu->name, "uncore_%s", type->name);
894		else
895			sprintf(pmu->name, "uncore");
896	} else {
897		/*
898		 * Use the box ID from the discovery table if applicable.
899		 */
900		sprintf(pmu->name, "uncore_%s_%d", type->name,
901			uncore_get_box_id(type, pmu));
902	}
903}
904
905static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
906{
907	int ret;
908
909	if (!pmu->type->pmu) {
910		pmu->pmu = (struct pmu) {
911			.attr_groups	= pmu->type->attr_groups,
912			.task_ctx_nr	= perf_invalid_context,
913			.pmu_enable	= uncore_pmu_enable,
914			.pmu_disable	= uncore_pmu_disable,
915			.event_init	= uncore_pmu_event_init,
916			.add		= uncore_pmu_event_add,
917			.del		= uncore_pmu_event_del,
918			.start		= uncore_pmu_event_start,
919			.stop		= uncore_pmu_event_stop,
920			.read		= uncore_pmu_event_read,
921			.module		= THIS_MODULE,
922			.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
923			.attr_update	= pmu->type->attr_update,
924		};
925	} else {
926		pmu->pmu = *pmu->type->pmu;
927		pmu->pmu.attr_groups = pmu->type->attr_groups;
928		pmu->pmu.attr_update = pmu->type->attr_update;
929	}
930
931	uncore_get_pmu_name(pmu);
932
933	ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
934	if (!ret)
935		pmu->registered = true;
936	return ret;
937}
938
939static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
940{
941	if (!pmu->registered)
942		return;
943	perf_pmu_unregister(&pmu->pmu);
944	pmu->registered = false;
945}
946
947static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
948{
949	int die;
950
951	for (die = 0; die < uncore_max_dies(); die++)
952		kfree(pmu->boxes[die]);
953	kfree(pmu->boxes);
954}
955
956static void uncore_type_exit(struct intel_uncore_type *type)
957{
958	struct intel_uncore_pmu *pmu = type->pmus;
959	int i;
960
961	if (type->cleanup_mapping)
962		type->cleanup_mapping(type);
963
964	if (pmu) {
965		for (i = 0; i < type->num_boxes; i++, pmu++) {
966			uncore_pmu_unregister(pmu);
967			uncore_free_boxes(pmu);
968		}
969		kfree(type->pmus);
970		type->pmus = NULL;
971	}
972	if (type->box_ids) {
973		kfree(type->box_ids);
974		type->box_ids = NULL;
975	}
976	kfree(type->events_group);
977	type->events_group = NULL;
978}
979
980static void uncore_types_exit(struct intel_uncore_type **types)
981{
982	for (; *types; types++)
983		uncore_type_exit(*types);
984}
985
986static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
987{
988	struct intel_uncore_pmu *pmus;
989	size_t size;
990	int i, j;
991
992	pmus = kcalloc(type->num_boxes, sizeof(*pmus), GFP_KERNEL);
993	if (!pmus)
994		return -ENOMEM;
995
996	size = uncore_max_dies() * sizeof(struct intel_uncore_box *);
997
998	for (i = 0; i < type->num_boxes; i++) {
999		pmus[i].func_id	= setid ? i : -1;
1000		pmus[i].pmu_idx	= i;
1001		pmus[i].type	= type;
1002		pmus[i].boxes	= kzalloc(size, GFP_KERNEL);
1003		if (!pmus[i].boxes)
1004			goto err;
1005	}
1006
1007	type->pmus = pmus;
1008	type->unconstrainted = (struct event_constraint)
1009		__EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
1010				0, type->num_counters, 0, 0);
1011
1012	if (type->event_descs) {
1013		struct {
1014			struct attribute_group group;
1015			struct attribute *attrs[];
1016		} *attr_group;
1017		for (i = 0; type->event_descs[i].attr.attr.name; i++);
1018
1019		attr_group = kzalloc(struct_size(attr_group, attrs, i + 1),
1020								GFP_KERNEL);
1021		if (!attr_group)
1022			goto err;
1023
1024		attr_group->group.name = "events";
1025		attr_group->group.attrs = attr_group->attrs;
1026
1027		for (j = 0; j < i; j++)
1028			attr_group->attrs[j] = &type->event_descs[j].attr.attr;
1029
1030		type->events_group = &attr_group->group;
1031	}
1032
1033	type->pmu_group = &uncore_pmu_attr_group;
1034
1035	if (type->set_mapping)
1036		type->set_mapping(type);
1037
1038	return 0;
1039
1040err:
1041	for (i = 0; i < type->num_boxes; i++)
1042		kfree(pmus[i].boxes);
1043	kfree(pmus);
1044
1045	return -ENOMEM;
1046}
1047
1048static int __init
1049uncore_types_init(struct intel_uncore_type **types, bool setid)
1050{
1051	int ret;
1052
1053	for (; *types; types++) {
1054		ret = uncore_type_init(*types, setid);
1055		if (ret)
1056			return ret;
1057	}
1058	return 0;
1059}
1060
1061/*
1062 * Get the die information of a PCI device.
1063 * @pdev: The PCI device.
1064 * @die: The die id which the device maps to.
1065 */
1066static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, int *die)
1067{
1068	*die = uncore_pcibus_to_dieid(pdev->bus);
1069	if (*die < 0)
1070		return -EINVAL;
1071
1072	return 0;
1073}
1074
1075static struct intel_uncore_pmu *
1076uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev)
1077{
1078	struct intel_uncore_type **types = uncore_pci_uncores;
1079	struct intel_uncore_type *type;
1080	u64 box_ctl;
1081	int i, die;
1082
1083	for (; *types; types++) {
1084		type = *types;
1085		for (die = 0; die < __uncore_max_dies; die++) {
1086			for (i = 0; i < type->num_boxes; i++) {
1087				if (!type->box_ctls[die])
1088					continue;
1089				box_ctl = type->box_ctls[die] + type->pci_offsets[i];
1090				if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(box_ctl) &&
1091				    pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(box_ctl) &&
1092				    pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl))
1093					return &type->pmus[i];
1094			}
1095		}
1096	}
1097
1098	return NULL;
1099}
1100
1101/*
1102 * Find the PMU of a PCI device.
1103 * @pdev: The PCI device.
1104 * @ids: The ID table of the available PCI devices with a PMU.
1105 *       If NULL, search the whole uncore_pci_uncores.
1106 */
1107static struct intel_uncore_pmu *
1108uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids)
1109{
1110	struct intel_uncore_pmu *pmu = NULL;
1111	struct intel_uncore_type *type;
1112	kernel_ulong_t data;
1113	unsigned int devfn;
1114
1115	if (!ids)
1116		return uncore_pci_find_dev_pmu_from_types(pdev);
1117
1118	while (ids && ids->vendor) {
1119		if ((ids->vendor == pdev->vendor) &&
1120		    (ids->device == pdev->device)) {
1121			data = ids->driver_data;
1122			devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data),
1123					  UNCORE_PCI_DEV_FUNC(data));
1124			if (devfn == pdev->devfn) {
1125				type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)];
1126				pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)];
1127				break;
1128			}
1129		}
1130		ids++;
1131	}
1132	return pmu;
1133}
1134
1135/*
1136 * Register the PMU for a PCI device
1137 * @pdev: The PCI device.
1138 * @type: The corresponding PMU type of the device.
1139 * @pmu: The corresponding PMU of the device.
1140 * @die: The die id which the device maps to.
1141 */
1142static int uncore_pci_pmu_register(struct pci_dev *pdev,
1143				   struct intel_uncore_type *type,
1144				   struct intel_uncore_pmu *pmu,
1145				   int die)
1146{
1147	struct intel_uncore_box *box;
1148	int ret;
1149
1150	if (WARN_ON_ONCE(pmu->boxes[die] != NULL))
1151		return -EINVAL;
1152
1153	box = uncore_alloc_box(type, NUMA_NO_NODE);
1154	if (!box)
1155		return -ENOMEM;
1156
1157	if (pmu->func_id < 0)
1158		pmu->func_id = pdev->devfn;
1159	else
1160		WARN_ON_ONCE(pmu->func_id != pdev->devfn);
1161
1162	atomic_inc(&box->refcnt);
1163	box->dieid = die;
1164	box->pci_dev = pdev;
1165	box->pmu = pmu;
1166	uncore_box_init(box);
1167
1168	pmu->boxes[die] = box;
1169	if (atomic_inc_return(&pmu->activeboxes) > 1)
1170		return 0;
1171
1172	/* First active box registers the pmu */
1173	ret = uncore_pmu_register(pmu);
1174	if (ret) {
1175		pmu->boxes[die] = NULL;
1176		uncore_box_exit(box);
1177		kfree(box);
1178	}
1179	return ret;
1180}
1181
1182/*
1183 * add a pci uncore device
1184 */
1185static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1186{
1187	struct intel_uncore_type *type;
1188	struct intel_uncore_pmu *pmu = NULL;
1189	int die, ret;
1190
1191	ret = uncore_pci_get_dev_die_info(pdev, &die);
1192	if (ret)
1193		return ret;
1194
1195	if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
1196		int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
1197
1198		uncore_extra_pci_dev[die].dev[idx] = pdev;
1199		pci_set_drvdata(pdev, NULL);
1200		return 0;
1201	}
1202
1203	type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
1204
1205	/*
1206	 * Some platforms, e.g.  Knights Landing, use a common PCI device ID
1207	 * for multiple instances of an uncore PMU device type. We should check
1208	 * PCI slot and func to indicate the uncore box.
1209	 */
1210	if (id->driver_data & ~0xffff) {
1211		struct pci_driver *pci_drv = to_pci_driver(pdev->dev.driver);
1212
1213		pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table);
1214		if (pmu == NULL)
1215			return -ENODEV;
1216	} else {
1217		/*
1218		 * for performance monitoring unit with multiple boxes,
1219		 * each box has a different function id.
1220		 */
1221		pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
1222	}
1223
1224	ret = uncore_pci_pmu_register(pdev, type, pmu, die);
1225
1226	pci_set_drvdata(pdev, pmu->boxes[die]);
1227
1228	return ret;
1229}
1230
1231/*
1232 * Unregister the PMU of a PCI device
1233 * @pmu: The corresponding PMU is unregistered.
1234 * @die: The die id which the device maps to.
1235 */
1236static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, int die)
1237{
1238	struct intel_uncore_box *box = pmu->boxes[die];
1239
1240	pmu->boxes[die] = NULL;
1241	if (atomic_dec_return(&pmu->activeboxes) == 0)
1242		uncore_pmu_unregister(pmu);
1243	uncore_box_exit(box);
1244	kfree(box);
1245}
1246
1247static void uncore_pci_remove(struct pci_dev *pdev)
1248{
1249	struct intel_uncore_box *box;
1250	struct intel_uncore_pmu *pmu;
1251	int i, die;
1252
1253	if (uncore_pci_get_dev_die_info(pdev, &die))
1254		return;
1255
1256	box = pci_get_drvdata(pdev);
1257	if (!box) {
1258		for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
1259			if (uncore_extra_pci_dev[die].dev[i] == pdev) {
1260				uncore_extra_pci_dev[die].dev[i] = NULL;
1261				break;
1262			}
1263		}
1264		WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
1265		return;
1266	}
1267
1268	pmu = box->pmu;
1269
1270	pci_set_drvdata(pdev, NULL);
1271
1272	uncore_pci_pmu_unregister(pmu, die);
1273}
1274
1275static int uncore_bus_notify(struct notifier_block *nb,
1276			     unsigned long action, void *data,
1277			     const struct pci_device_id *ids)
1278{
1279	struct device *dev = data;
1280	struct pci_dev *pdev = to_pci_dev(dev);
1281	struct intel_uncore_pmu *pmu;
1282	int die;
1283
1284	/* Unregister the PMU when the device is going to be deleted. */
1285	if (action != BUS_NOTIFY_DEL_DEVICE)
1286		return NOTIFY_DONE;
1287
1288	pmu = uncore_pci_find_dev_pmu(pdev, ids);
1289	if (!pmu)
1290		return NOTIFY_DONE;
1291
1292	if (uncore_pci_get_dev_die_info(pdev, &die))
1293		return NOTIFY_DONE;
1294
1295	uncore_pci_pmu_unregister(pmu, die);
1296
1297	return NOTIFY_OK;
1298}
1299
1300static int uncore_pci_sub_bus_notify(struct notifier_block *nb,
1301				     unsigned long action, void *data)
1302{
1303	return uncore_bus_notify(nb, action, data,
1304				 uncore_pci_sub_driver->id_table);
1305}
1306
1307static struct notifier_block uncore_pci_sub_notifier = {
1308	.notifier_call = uncore_pci_sub_bus_notify,
1309};
1310
1311static void uncore_pci_sub_driver_init(void)
1312{
1313	const struct pci_device_id *ids = uncore_pci_sub_driver->id_table;
1314	struct intel_uncore_type *type;
1315	struct intel_uncore_pmu *pmu;
1316	struct pci_dev *pci_sub_dev;
1317	bool notify = false;
1318	unsigned int devfn;
1319	int die;
1320
1321	while (ids && ids->vendor) {
1322		pci_sub_dev = NULL;
1323		type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)];
1324		/*
1325		 * Search the available device, and register the
1326		 * corresponding PMU.
1327		 */
1328		while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL,
1329						     ids->device, pci_sub_dev))) {
1330			devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
1331					  UNCORE_PCI_DEV_FUNC(ids->driver_data));
1332			if (devfn != pci_sub_dev->devfn)
1333				continue;
1334
1335			pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
1336			if (!pmu)
1337				continue;
1338
1339			if (uncore_pci_get_dev_die_info(pci_sub_dev, &die))
1340				continue;
1341
1342			if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu,
1343						     die))
1344				notify = true;
1345		}
1346		ids++;
1347	}
1348
1349	if (notify && bus_register_notifier(&pci_bus_type, &uncore_pci_sub_notifier))
1350		notify = false;
1351
1352	if (!notify)
1353		uncore_pci_sub_driver = NULL;
1354}
1355
1356static int uncore_pci_bus_notify(struct notifier_block *nb,
1357				     unsigned long action, void *data)
1358{
1359	return uncore_bus_notify(nb, action, data, NULL);
1360}
1361
1362static struct notifier_block uncore_pci_notifier = {
1363	.notifier_call = uncore_pci_bus_notify,
1364};
1365
1366
1367static void uncore_pci_pmus_register(void)
1368{
1369	struct intel_uncore_type **types = uncore_pci_uncores;
1370	struct intel_uncore_type *type;
1371	struct intel_uncore_pmu *pmu;
1372	struct pci_dev *pdev;
1373	u64 box_ctl;
1374	int i, die;
1375
1376	for (; *types; types++) {
1377		type = *types;
1378		for (die = 0; die < __uncore_max_dies; die++) {
1379			for (i = 0; i < type->num_boxes; i++) {
1380				if (!type->box_ctls[die])
1381					continue;
1382				box_ctl = type->box_ctls[die] + type->pci_offsets[i];
1383				pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl),
1384								   UNCORE_DISCOVERY_PCI_BUS(box_ctl),
1385								   UNCORE_DISCOVERY_PCI_DEVFN(box_ctl));
1386				if (!pdev)
1387					continue;
1388				pmu = &type->pmus[i];
1389
1390				uncore_pci_pmu_register(pdev, type, pmu, die);
1391			}
1392		}
1393	}
1394
1395	bus_register_notifier(&pci_bus_type, &uncore_pci_notifier);
1396}
1397
1398static int __init uncore_pci_init(void)
1399{
1400	size_t size;
1401	int ret;
1402
1403	size = uncore_max_dies() * sizeof(struct pci_extra_dev);
1404	uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
1405	if (!uncore_extra_pci_dev) {
1406		ret = -ENOMEM;
1407		goto err;
1408	}
1409
1410	ret = uncore_types_init(uncore_pci_uncores, false);
1411	if (ret)
1412		goto errtype;
1413
1414	if (uncore_pci_driver) {
1415		uncore_pci_driver->probe = uncore_pci_probe;
1416		uncore_pci_driver->remove = uncore_pci_remove;
1417
1418		ret = pci_register_driver(uncore_pci_driver);
1419		if (ret)
1420			goto errtype;
1421	} else
1422		uncore_pci_pmus_register();
1423
1424	if (uncore_pci_sub_driver)
1425		uncore_pci_sub_driver_init();
1426
1427	pcidrv_registered = true;
1428	return 0;
1429
1430errtype:
1431	uncore_types_exit(uncore_pci_uncores);
1432	kfree(uncore_extra_pci_dev);
1433	uncore_extra_pci_dev = NULL;
1434	uncore_free_pcibus_map();
1435err:
1436	uncore_pci_uncores = empty_uncore;
1437	return ret;
1438}
1439
1440static void uncore_pci_exit(void)
1441{
1442	if (pcidrv_registered) {
1443		pcidrv_registered = false;
1444		if (uncore_pci_sub_driver)
1445			bus_unregister_notifier(&pci_bus_type, &uncore_pci_sub_notifier);
1446		if (uncore_pci_driver)
1447			pci_unregister_driver(uncore_pci_driver);
1448		else
1449			bus_unregister_notifier(&pci_bus_type, &uncore_pci_notifier);
1450		uncore_types_exit(uncore_pci_uncores);
1451		kfree(uncore_extra_pci_dev);
1452		uncore_free_pcibus_map();
1453	}
1454}
1455
1456static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
1457				   int new_cpu)
1458{
1459	struct intel_uncore_pmu *pmu = type->pmus;
1460	struct intel_uncore_box *box;
1461	int i, die;
1462
1463	die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu);
1464	for (i = 0; i < type->num_boxes; i++, pmu++) {
1465		box = pmu->boxes[die];
1466		if (!box)
1467			continue;
1468
1469		if (old_cpu < 0) {
1470			WARN_ON_ONCE(box->cpu != -1);
1471			box->cpu = new_cpu;
1472			continue;
1473		}
1474
1475		WARN_ON_ONCE(box->cpu != old_cpu);
1476		box->cpu = -1;
1477		if (new_cpu < 0)
1478			continue;
1479
1480		uncore_pmu_cancel_hrtimer(box);
1481		perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
1482		box->cpu = new_cpu;
1483	}
1484}
1485
1486static void uncore_change_context(struct intel_uncore_type **uncores,
1487				  int old_cpu, int new_cpu)
1488{
1489	for (; *uncores; uncores++)
1490		uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
1491}
1492
1493static void uncore_box_unref(struct intel_uncore_type **types, int id)
1494{
1495	struct intel_uncore_type *type;
1496	struct intel_uncore_pmu *pmu;
1497	struct intel_uncore_box *box;
1498	int i;
1499
1500	for (; *types; types++) {
1501		type = *types;
1502		pmu = type->pmus;
1503		for (i = 0; i < type->num_boxes; i++, pmu++) {
1504			box = pmu->boxes[id];
1505			if (box && atomic_dec_return(&box->refcnt) == 0)
1506				uncore_box_exit(box);
1507		}
1508	}
1509}
1510
1511static int uncore_event_cpu_offline(unsigned int cpu)
1512{
1513	int die, target;
1514
1515	/* Check if exiting cpu is used for collecting uncore events */
1516	if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1517		goto unref;
1518	/* Find a new cpu to collect uncore events */
1519	target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
1520
1521	/* Migrate uncore events to the new target */
1522	if (target < nr_cpu_ids)
1523		cpumask_set_cpu(target, &uncore_cpu_mask);
1524	else
1525		target = -1;
1526
1527	uncore_change_context(uncore_msr_uncores, cpu, target);
1528	uncore_change_context(uncore_mmio_uncores, cpu, target);
1529	uncore_change_context(uncore_pci_uncores, cpu, target);
1530
1531unref:
1532	/* Clear the references */
1533	die = topology_logical_die_id(cpu);
1534	uncore_box_unref(uncore_msr_uncores, die);
1535	uncore_box_unref(uncore_mmio_uncores, die);
1536	return 0;
1537}
1538
1539static int allocate_boxes(struct intel_uncore_type **types,
1540			 unsigned int die, unsigned int cpu)
1541{
1542	struct intel_uncore_box *box, *tmp;
1543	struct intel_uncore_type *type;
1544	struct intel_uncore_pmu *pmu;
1545	LIST_HEAD(allocated);
1546	int i;
1547
1548	/* Try to allocate all required boxes */
1549	for (; *types; types++) {
1550		type = *types;
1551		pmu = type->pmus;
1552		for (i = 0; i < type->num_boxes; i++, pmu++) {
1553			if (pmu->boxes[die])
1554				continue;
1555			box = uncore_alloc_box(type, cpu_to_node(cpu));
1556			if (!box)
1557				goto cleanup;
1558			box->pmu = pmu;
1559			box->dieid = die;
1560			list_add(&box->active_list, &allocated);
1561		}
1562	}
1563	/* Install them in the pmus */
1564	list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1565		list_del_init(&box->active_list);
1566		box->pmu->boxes[die] = box;
1567	}
1568	return 0;
1569
1570cleanup:
1571	list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1572		list_del_init(&box->active_list);
1573		kfree(box);
1574	}
1575	return -ENOMEM;
1576}
1577
1578static int uncore_box_ref(struct intel_uncore_type **types,
1579			  int id, unsigned int cpu)
1580{
1581	struct intel_uncore_type *type;
1582	struct intel_uncore_pmu *pmu;
1583	struct intel_uncore_box *box;
1584	int i, ret;
1585
1586	ret = allocate_boxes(types, id, cpu);
1587	if (ret)
1588		return ret;
1589
1590	for (; *types; types++) {
1591		type = *types;
1592		pmu = type->pmus;
1593		for (i = 0; i < type->num_boxes; i++, pmu++) {
1594			box = pmu->boxes[id];
1595			if (box && atomic_inc_return(&box->refcnt) == 1)
1596				uncore_box_init(box);
1597		}
1598	}
1599	return 0;
1600}
1601
1602static int uncore_event_cpu_online(unsigned int cpu)
1603{
1604	int die, target, msr_ret, mmio_ret;
1605
1606	die = topology_logical_die_id(cpu);
1607	msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu);
1608	mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu);
1609	if (msr_ret && mmio_ret)
1610		return -ENOMEM;
1611
1612	/*
1613	 * Check if there is an online cpu in the package
1614	 * which collects uncore events already.
1615	 */
1616	target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
1617	if (target < nr_cpu_ids)
1618		return 0;
1619
1620	cpumask_set_cpu(cpu, &uncore_cpu_mask);
1621
1622	if (!msr_ret)
1623		uncore_change_context(uncore_msr_uncores, -1, cpu);
1624	if (!mmio_ret)
1625		uncore_change_context(uncore_mmio_uncores, -1, cpu);
1626	uncore_change_context(uncore_pci_uncores, -1, cpu);
1627	return 0;
1628}
1629
1630static int __init type_pmu_register(struct intel_uncore_type *type)
1631{
1632	int i, ret;
1633
1634	for (i = 0; i < type->num_boxes; i++) {
1635		ret = uncore_pmu_register(&type->pmus[i]);
1636		if (ret)
1637			return ret;
1638	}
1639	return 0;
1640}
1641
1642static int __init uncore_msr_pmus_register(void)
1643{
1644	struct intel_uncore_type **types = uncore_msr_uncores;
1645	int ret;
1646
1647	for (; *types; types++) {
1648		ret = type_pmu_register(*types);
1649		if (ret)
1650			return ret;
1651	}
1652	return 0;
1653}
1654
1655static int __init uncore_cpu_init(void)
1656{
1657	int ret;
1658
1659	ret = uncore_types_init(uncore_msr_uncores, true);
1660	if (ret)
1661		goto err;
1662
1663	ret = uncore_msr_pmus_register();
1664	if (ret)
1665		goto err;
1666	return 0;
1667err:
1668	uncore_types_exit(uncore_msr_uncores);
1669	uncore_msr_uncores = empty_uncore;
1670	return ret;
1671}
1672
1673static int __init uncore_mmio_init(void)
1674{
1675	struct intel_uncore_type **types = uncore_mmio_uncores;
1676	int ret;
1677
1678	ret = uncore_types_init(types, true);
1679	if (ret)
1680		goto err;
1681
1682	for (; *types; types++) {
1683		ret = type_pmu_register(*types);
1684		if (ret)
1685			goto err;
1686	}
1687	return 0;
1688err:
1689	uncore_types_exit(uncore_mmio_uncores);
1690	uncore_mmio_uncores = empty_uncore;
1691	return ret;
1692}
1693
1694struct intel_uncore_init_fun {
1695	void	(*cpu_init)(void);
1696	int	(*pci_init)(void);
1697	void	(*mmio_init)(void);
1698	/* Discovery table is required */
1699	bool	use_discovery;
1700	/* The units in the discovery table should be ignored. */
1701	int	*uncore_units_ignore;
1702};
1703
1704static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
1705	.cpu_init = nhm_uncore_cpu_init,
1706};
1707
1708static const struct intel_uncore_init_fun snb_uncore_init __initconst = {
1709	.cpu_init = snb_uncore_cpu_init,
1710	.pci_init = snb_uncore_pci_init,
1711};
1712
1713static const struct intel_uncore_init_fun ivb_uncore_init __initconst = {
1714	.cpu_init = snb_uncore_cpu_init,
1715	.pci_init = ivb_uncore_pci_init,
1716};
1717
1718static const struct intel_uncore_init_fun hsw_uncore_init __initconst = {
1719	.cpu_init = snb_uncore_cpu_init,
1720	.pci_init = hsw_uncore_pci_init,
1721};
1722
1723static const struct intel_uncore_init_fun bdw_uncore_init __initconst = {
1724	.cpu_init = snb_uncore_cpu_init,
1725	.pci_init = bdw_uncore_pci_init,
1726};
1727
1728static const struct intel_uncore_init_fun snbep_uncore_init __initconst = {
1729	.cpu_init = snbep_uncore_cpu_init,
1730	.pci_init = snbep_uncore_pci_init,
1731};
1732
1733static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = {
1734	.cpu_init = nhmex_uncore_cpu_init,
1735};
1736
1737static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = {
1738	.cpu_init = ivbep_uncore_cpu_init,
1739	.pci_init = ivbep_uncore_pci_init,
1740};
1741
1742static const struct intel_uncore_init_fun hswep_uncore_init __initconst = {
1743	.cpu_init = hswep_uncore_cpu_init,
1744	.pci_init = hswep_uncore_pci_init,
1745};
1746
1747static const struct intel_uncore_init_fun bdx_uncore_init __initconst = {
1748	.cpu_init = bdx_uncore_cpu_init,
1749	.pci_init = bdx_uncore_pci_init,
1750};
1751
1752static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
1753	.cpu_init = knl_uncore_cpu_init,
1754	.pci_init = knl_uncore_pci_init,
1755};
1756
1757static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
1758	.cpu_init = skl_uncore_cpu_init,
1759	.pci_init = skl_uncore_pci_init,
1760};
1761
1762static const struct intel_uncore_init_fun skx_uncore_init __initconst = {
1763	.cpu_init = skx_uncore_cpu_init,
1764	.pci_init = skx_uncore_pci_init,
1765};
1766
1767static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
1768	.cpu_init = icl_uncore_cpu_init,
1769	.pci_init = skl_uncore_pci_init,
1770};
1771
1772static const struct intel_uncore_init_fun tgl_uncore_init __initconst = {
1773	.cpu_init = tgl_uncore_cpu_init,
1774	.mmio_init = tgl_uncore_mmio_init,
1775};
1776
1777static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = {
1778	.cpu_init = tgl_uncore_cpu_init,
1779	.mmio_init = tgl_l_uncore_mmio_init,
1780};
1781
1782static const struct intel_uncore_init_fun rkl_uncore_init __initconst = {
1783	.cpu_init = tgl_uncore_cpu_init,
1784	.pci_init = skl_uncore_pci_init,
1785};
1786
1787static const struct intel_uncore_init_fun adl_uncore_init __initconst = {
1788	.cpu_init = adl_uncore_cpu_init,
1789	.mmio_init = adl_uncore_mmio_init,
1790};
1791
1792static const struct intel_uncore_init_fun mtl_uncore_init __initconst = {
1793	.cpu_init = mtl_uncore_cpu_init,
1794	.mmio_init = adl_uncore_mmio_init,
1795};
1796
1797static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
1798	.cpu_init = icx_uncore_cpu_init,
1799	.pci_init = icx_uncore_pci_init,
1800	.mmio_init = icx_uncore_mmio_init,
1801};
1802
1803static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
1804	.cpu_init = snr_uncore_cpu_init,
1805	.pci_init = snr_uncore_pci_init,
1806	.mmio_init = snr_uncore_mmio_init,
1807};
1808
1809static const struct intel_uncore_init_fun spr_uncore_init __initconst = {
1810	.cpu_init = spr_uncore_cpu_init,
1811	.pci_init = spr_uncore_pci_init,
1812	.mmio_init = spr_uncore_mmio_init,
1813	.use_discovery = true,
1814	.uncore_units_ignore = spr_uncore_units_ignore,
1815};
1816
1817static const struct intel_uncore_init_fun gnr_uncore_init __initconst = {
1818	.cpu_init = gnr_uncore_cpu_init,
1819	.pci_init = gnr_uncore_pci_init,
1820	.mmio_init = gnr_uncore_mmio_init,
1821	.use_discovery = true,
1822	.uncore_units_ignore = gnr_uncore_units_ignore,
1823};
1824
1825static const struct intel_uncore_init_fun generic_uncore_init __initconst = {
1826	.cpu_init = intel_uncore_generic_uncore_cpu_init,
1827	.pci_init = intel_uncore_generic_uncore_pci_init,
1828	.mmio_init = intel_uncore_generic_uncore_mmio_init,
1829};
1830
1831static const struct x86_cpu_id intel_uncore_match[] __initconst = {
1832	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,		&nhm_uncore_init),
1833	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,		&nhm_uncore_init),
1834	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,		&nhm_uncore_init),
1835	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,		&nhm_uncore_init),
1836	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,		&snb_uncore_init),
1837	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,		&ivb_uncore_init),
1838	X86_MATCH_INTEL_FAM6_MODEL(HASWELL,		&hsw_uncore_init),
1839	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,		&hsw_uncore_init),
1840	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,		&hsw_uncore_init),
1841	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,		&bdw_uncore_init),
1842	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,		&bdw_uncore_init),
1843	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,	&snbep_uncore_init),
1844	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,		&nhmex_uncore_init),
1845	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,		&nhmex_uncore_init),
1846	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,		&ivbep_uncore_init),
1847	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,		&hswep_uncore_init),
1848	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,		&bdx_uncore_init),
1849	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,		&bdx_uncore_init),
1850	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,	&knl_uncore_init),
1851	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,	&knl_uncore_init),
1852	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,		&skl_uncore_init),
1853	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,		&skl_uncore_init),
1854	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,		&skx_uncore_init),
1855	X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,		&skl_uncore_init),
1856	X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,		&skl_uncore_init),
1857	X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L,		&skl_uncore_init),
1858	X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE,		&skl_uncore_init),
1859	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,		&icl_uncore_init),
1860	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI,	&icl_uncore_init),
1861	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,		&icl_uncore_init),
1862	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,		&icx_uncore_init),
1863	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,		&icx_uncore_init),
1864	X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L,		&tgl_l_uncore_init),
1865	X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,		&tgl_uncore_init),
1866	X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,		&rkl_uncore_init),
1867	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,		&adl_uncore_init),
1868	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,		&adl_uncore_init),
1869	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,		&adl_uncore_init),
1870	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P,	&adl_uncore_init),
1871	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S,	&adl_uncore_init),
1872	X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE,		&mtl_uncore_init),
1873	X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L,	&mtl_uncore_init),
1874	X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,	&spr_uncore_init),
1875	X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X,	&spr_uncore_init),
1876	X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X,	&gnr_uncore_init),
1877	X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_D,	&gnr_uncore_init),
1878	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,	&snr_uncore_init),
1879	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT,	&adl_uncore_init),
1880	X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X,	&gnr_uncore_init),
1881	X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT,	&gnr_uncore_init),
1882	{},
1883};
1884MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
1885
1886static int __init intel_uncore_init(void)
1887{
1888	const struct x86_cpu_id *id;
1889	struct intel_uncore_init_fun *uncore_init;
1890	int pret = 0, cret = 0, mret = 0, ret;
1891
1892	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
1893		return -ENODEV;
1894
1895	__uncore_max_dies =
1896		topology_max_packages() * topology_max_dies_per_package();
1897
1898	id = x86_match_cpu(intel_uncore_match);
1899	if (!id) {
1900		if (!uncore_no_discover && intel_uncore_has_discovery_tables(NULL))
1901			uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init;
1902		else
1903			return -ENODEV;
1904	} else {
1905		uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
1906		if (uncore_no_discover && uncore_init->use_discovery)
1907			return -ENODEV;
1908		if (uncore_init->use_discovery &&
1909		    !intel_uncore_has_discovery_tables(uncore_init->uncore_units_ignore))
1910			return -ENODEV;
1911	}
1912
1913	if (uncore_init->pci_init) {
1914		pret = uncore_init->pci_init();
1915		if (!pret)
1916			pret = uncore_pci_init();
1917	}
1918
1919	if (uncore_init->cpu_init) {
1920		uncore_init->cpu_init();
1921		cret = uncore_cpu_init();
1922	}
1923
1924	if (uncore_init->mmio_init) {
1925		uncore_init->mmio_init();
1926		mret = uncore_mmio_init();
1927	}
1928
1929	if (cret && pret && mret) {
1930		ret = -ENODEV;
1931		goto free_discovery;
1932	}
1933
1934	/* Install hotplug callbacks to setup the targets for each package */
1935	ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
1936				"perf/x86/intel/uncore:online",
1937				uncore_event_cpu_online,
1938				uncore_event_cpu_offline);
1939	if (ret)
1940		goto err;
1941	return 0;
1942
1943err:
1944	uncore_types_exit(uncore_msr_uncores);
1945	uncore_types_exit(uncore_mmio_uncores);
1946	uncore_pci_exit();
1947free_discovery:
1948	intel_uncore_clear_discovery_tables();
1949	return ret;
1950}
1951module_init(intel_uncore_init);
1952
1953static void __exit intel_uncore_exit(void)
1954{
1955	cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
1956	uncore_types_exit(uncore_msr_uncores);
1957	uncore_types_exit(uncore_mmio_uncores);
1958	uncore_pci_exit();
1959	intel_uncore_clear_discovery_tables();
1960}
1961module_exit(intel_uncore_exit);
1962