1/*-
2 * Copyright (c) 2003-2008 Joseph Koshy
3 * Copyright (c) 2007 The FreeBSD Foundation
4 * All rights reserved.
5 *
6 * Portions of this software were developed by A. Joseph Koshy under
7 * sponsorship from the FreeBSD Foundation and Google, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: stable/11/sys/kern/kern_pmc.c 315285 2017-03-14 20:43:04Z mjg $");
33
34#include "opt_hwpmc_hooks.h"
35
36#include <sys/types.h>
37#include <sys/ctype.h>
38#include <sys/param.h>
39#include <sys/malloc.h>
40#include <sys/kernel.h>
41#include <sys/lock.h>
42#include <sys/mutex.h>
43#include <sys/pmc.h>
44#include <sys/pmckern.h>
45#include <sys/smp.h>
46#include <sys/sysctl.h>
47#include <sys/systm.h>
48
49#ifdef	HWPMC_HOOKS
50FEATURE(hwpmc_hooks, "Kernel support for HW PMC");
51#define	PMC_KERNEL_VERSION	PMC_VERSION
52#else
53#define	PMC_KERNEL_VERSION	0
54#endif
55
56MALLOC_DECLARE(M_PMCHOOKS);
57MALLOC_DEFINE(M_PMCHOOKS, "pmchooks", "Memory space for PMC hooks");
58
59const int pmc_kernel_version = PMC_KERNEL_VERSION;
60
61/* Hook variable. */
62int __read_mostly (*pmc_hook)(struct thread *td, int function, void *arg) = NULL;
63
64/* Interrupt handler */
65int __read_mostly (*pmc_intr)(int cpu, struct trapframe *tf) = NULL;
66
67/* Bitmask of CPUs requiring servicing at hardclock time */
68volatile cpuset_t pmc_cpumask;
69
70/*
71 * A global count of SS mode PMCs.  When non-zero, this means that
72 * we have processes that are sampling the system as a whole.
73 */
74volatile int pmc_ss_count;
75
76/*
77 * Since PMC(4) may not be loaded in the current kernel, the
78 * convention followed is that a non-NULL value of 'pmc_hook' implies
79 * the presence of this kernel module.
80 *
81 * This requires us to protect 'pmc_hook' with a
82 * shared (sx) lock -- thus making the process of calling into PMC(4)
83 * somewhat more expensive than a simple 'if' check and indirect call.
84 */
85struct sx pmc_sx;
86
87/*
88 * PMC Soft per cpu trapframe.
89 */
90struct trapframe pmc_tf[MAXCPU];
91
92/*
93 * PMC Soft use a global table to store registered events.
94 */
95
96SYSCTL_NODE(_kern, OID_AUTO, hwpmc, CTLFLAG_RW, 0, "HWPMC parameters");
97
98static int pmc_softevents = 16;
99SYSCTL_INT(_kern_hwpmc, OID_AUTO, softevents, CTLFLAG_RDTUN,
100    &pmc_softevents, 0, "maximum number of soft events");
101
102struct mtx pmc_softs_mtx;
103int pmc_softs_count;
104struct pmc_soft **pmc_softs;
105
106MTX_SYSINIT(pmc_soft_mtx, &pmc_softs_mtx, "pmc-softs", MTX_SPIN);
107
108static void
109pmc_init_sx(void)
110{
111	sx_init_flags(&pmc_sx, "pmc-sx", SX_NOWITNESS);
112}
113
114SYSINIT(pmcsx, SI_SUB_LOCK, SI_ORDER_MIDDLE, pmc_init_sx, NULL);
115
116/*
117 * Helper functions.
118 */
119
120/*
121 * A note on the CPU numbering scheme used by the hwpmc(4) driver.
122 *
123 * CPUs are denoted using numbers in the range 0..[pmc_cpu_max()-1].
124 * CPUs could be numbered "sparsely" in this range; the predicate
125 * `pmc_cpu_is_present()' is used to test whether a given CPU is
126 * physically present.
127 *
128 * Further, a CPU that is physically present may be administratively
129 * disabled or otherwise unavailable for use by hwpmc(4).  The
130 * `pmc_cpu_is_active()' predicate tests for CPU usability.  An
131 * "active" CPU participates in thread scheduling and can field
132 * interrupts raised by PMC hardware.
133 *
134 * On systems with hyperthreaded CPUs, multiple logical CPUs may share
135 * PMC hardware resources.  For such processors one logical CPU is
136 * denoted as the primary owner of the in-CPU PMC resources. The
137 * pmc_cpu_is_primary() predicate is used to distinguish this primary
138 * CPU from the others.
139 */
140
141int
142pmc_cpu_is_active(int cpu)
143{
144#ifdef	SMP
145	return (pmc_cpu_is_present(cpu) &&
146	    !CPU_ISSET(cpu, &hlt_cpus_mask));
147#else
148	return (1);
149#endif
150}
151
152/* Deprecated. */
153int
154pmc_cpu_is_disabled(int cpu)
155{
156	return (!pmc_cpu_is_active(cpu));
157}
158
159int
160pmc_cpu_is_present(int cpu)
161{
162#ifdef	SMP
163	return (!CPU_ABSENT(cpu));
164#else
165	return (1);
166#endif
167}
168
169int
170pmc_cpu_is_primary(int cpu)
171{
172#ifdef	SMP
173	return (!CPU_ISSET(cpu, &logical_cpus_mask));
174#else
175	return (1);
176#endif
177}
178
179
180/*
181 * Return the maximum CPU number supported by the system.  The return
182 * value is used for scaling internal data structures and for runtime
183 * checks.
184 */
185unsigned int
186pmc_cpu_max(void)
187{
188#ifdef	SMP
189	return (mp_maxid+1);
190#else
191	return (1);
192#endif
193}
194
195#ifdef	INVARIANTS
196
197/*
198 * Return the count of CPUs in the `active' state in the system.
199 */
200int
201pmc_cpu_max_active(void)
202{
203#ifdef	SMP
204	/*
205	 * When support for CPU hot-plugging is added to the kernel,
206	 * this function would change to return the current number
207	 * of "active" CPUs.
208	 */
209	return (mp_ncpus);
210#else
211	return (1);
212#endif
213}
214
215#endif
216
217/*
218 * Cleanup event name:
219 * - remove duplicate '_'
220 * - all uppercase
221 */
222static void
223pmc_soft_namecleanup(char *name)
224{
225	char *p, *q;
226
227	p = q = name;
228
229	for ( ; *p == '_' ; p++)
230		;
231	for ( ; *p ; p++) {
232		if (*p == '_' && (*(p + 1) == '_' || *(p + 1) == '\0'))
233			continue;
234		else
235			*q++ = toupper(*p);
236	}
237	*q = '\0';
238}
239
240void
241pmc_soft_ev_register(struct pmc_soft *ps)
242{
243	static int warned = 0;
244	int n;
245
246	ps->ps_running  = 0;
247	ps->ps_ev.pm_ev_code = 0; /* invalid */
248	pmc_soft_namecleanup(ps->ps_ev.pm_ev_name);
249
250	mtx_lock_spin(&pmc_softs_mtx);
251
252	if (pmc_softs_count >= pmc_softevents) {
253		/*
254		 * XXX Reusing events can enter a race condition where
255		 * new allocated event will be used as an old one.
256		 */
257		for (n = 0; n < pmc_softevents; n++)
258			if (pmc_softs[n] == NULL)
259				break;
260		if (n == pmc_softevents) {
261			mtx_unlock_spin(&pmc_softs_mtx);
262			if (!warned) {
263				printf("hwpmc: too many soft events, "
264				    "increase kern.hwpmc.softevents tunable\n");
265				warned = 1;
266			}
267			return;
268		}
269
270		ps->ps_ev.pm_ev_code = PMC_EV_SOFT_FIRST + n;
271		pmc_softs[n] = ps;
272	} else {
273		ps->ps_ev.pm_ev_code = PMC_EV_SOFT_FIRST + pmc_softs_count;
274		pmc_softs[pmc_softs_count++] = ps;
275	}
276
277	mtx_unlock_spin(&pmc_softs_mtx);
278}
279
280void
281pmc_soft_ev_deregister(struct pmc_soft *ps)
282{
283
284	KASSERT(ps != NULL, ("pmc_soft_deregister: called with NULL"));
285
286	mtx_lock_spin(&pmc_softs_mtx);
287
288	if (ps->ps_ev.pm_ev_code != 0 &&
289	    (ps->ps_ev.pm_ev_code - PMC_EV_SOFT_FIRST) < pmc_softevents) {
290		KASSERT(ps->ps_ev.pm_ev_code >= PMC_EV_SOFT_FIRST &&
291		    ps->ps_ev.pm_ev_code <= PMC_EV_SOFT_LAST,
292		    ("pmc_soft_deregister: invalid event value"));
293		pmc_softs[ps->ps_ev.pm_ev_code - PMC_EV_SOFT_FIRST] = NULL;
294	}
295
296	mtx_unlock_spin(&pmc_softs_mtx);
297}
298
299struct pmc_soft *
300pmc_soft_ev_acquire(enum pmc_event ev)
301{
302	struct pmc_soft *ps;
303
304	if (ev == 0 || (ev - PMC_EV_SOFT_FIRST) >= pmc_softevents)
305		return NULL;
306
307	KASSERT(ev >= PMC_EV_SOFT_FIRST &&
308	    ev <= PMC_EV_SOFT_LAST,
309	    ("event out of range"));
310
311	mtx_lock_spin(&pmc_softs_mtx);
312
313	ps = pmc_softs[ev - PMC_EV_SOFT_FIRST];
314	if (ps == NULL)
315		mtx_unlock_spin(&pmc_softs_mtx);
316
317	return ps;
318}
319
320void
321pmc_soft_ev_release(struct pmc_soft *ps)
322{
323
324	mtx_unlock_spin(&pmc_softs_mtx);
325}
326
327/*
328 *  Initialise hwpmc.
329 */
330static void
331init_hwpmc(void *dummy __unused)
332{
333	if (pmc_softevents <= 0 ||
334	    pmc_softevents > PMC_EV_DYN_COUNT) {
335		(void) printf("hwpmc: tunable \"softevents\"=%d out of "
336		    "range.\n", pmc_softevents);
337		pmc_softevents = PMC_EV_DYN_COUNT;
338	}
339	pmc_softs = malloc(pmc_softevents * sizeof(struct pmc_soft *), M_PMCHOOKS, M_NOWAIT|M_ZERO);
340	KASSERT(pmc_softs != NULL, ("cannot allocate soft events table"));
341}
342
343SYSINIT(hwpmc, SI_SUB_KDTRACE, SI_ORDER_FIRST, init_hwpmc, NULL);
344
345