1145256Sjkoshy/*-
2183266Sjkoshy * Copyright (c) 2003-2008 Joseph Koshy
3174395Sjkoshy * Copyright (c) 2007 The FreeBSD Foundation
4174395Sjkoshy * All rights reserved.
5145256Sjkoshy *
6174395Sjkoshy * Portions of this software were developed by A. Joseph Koshy under
7174395Sjkoshy * sponsorship from the FreeBSD Foundation and Google, Inc.
8174395Sjkoshy *
9145256Sjkoshy * Redistribution and use in source and binary forms, with or without
10145256Sjkoshy * modification, are permitted provided that the following conditions
11145256Sjkoshy * are met:
12145256Sjkoshy * 1. Redistributions of source code must retain the above copyright
13145256Sjkoshy *    notice, this list of conditions and the following disclaimer.
14145256Sjkoshy * 2. Redistributions in binary form must reproduce the above copyright
15145256Sjkoshy *    notice, this list of conditions and the following disclaimer in the
16145256Sjkoshy *    documentation and/or other materials provided with the distribution.
17145256Sjkoshy *
18145256Sjkoshy * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
19145256Sjkoshy * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20145256Sjkoshy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21145256Sjkoshy * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
22145256Sjkoshy * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23145256Sjkoshy * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24145256Sjkoshy * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25145256Sjkoshy * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26145256Sjkoshy * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27145256Sjkoshy * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28145256Sjkoshy * SUCH DAMAGE.
29145256Sjkoshy */
30145256Sjkoshy
31145256Sjkoshy#include <sys/cdefs.h>
32145256Sjkoshy__FBSDID("$FreeBSD: stable/11/sys/kern/kern_pmc.c 315285 2017-03-14 20:43:04Z mjg $");
33145256Sjkoshy
34148562Sjkoshy#include "opt_hwpmc_hooks.h"
35148562Sjkoshy
36148562Sjkoshy#include <sys/types.h>
37233628Sfabient#include <sys/ctype.h>
38233628Sfabient#include <sys/param.h>
39233628Sfabient#include <sys/malloc.h>
40233628Sfabient#include <sys/kernel.h>
41233628Sfabient#include <sys/lock.h>
42233628Sfabient#include <sys/mutex.h>
43148562Sjkoshy#include <sys/pmc.h>
44145256Sjkoshy#include <sys/pmckern.h>
45145256Sjkoshy#include <sys/smp.h>
46219028Snetchild#include <sys/sysctl.h>
47233628Sfabient#include <sys/systm.h>
48145256Sjkoshy
49153072Sru#ifdef	HWPMC_HOOKS
50219028SnetchildFEATURE(hwpmc_hooks, "Kernel support for HW PMC");
51148562Sjkoshy#define	PMC_KERNEL_VERSION	PMC_VERSION
52148562Sjkoshy#else
53148562Sjkoshy#define	PMC_KERNEL_VERSION	0
54148562Sjkoshy#endif
55145256Sjkoshy
56233628SfabientMALLOC_DECLARE(M_PMCHOOKS);
57233628SfabientMALLOC_DEFINE(M_PMCHOOKS, "pmchooks", "Memory space for PMC hooks");
58233628Sfabient
59148562Sjkoshyconst int pmc_kernel_version = PMC_KERNEL_VERSION;
60148562Sjkoshy
61145256Sjkoshy/* Hook variable. */
62315285Smjgint __read_mostly (*pmc_hook)(struct thread *td, int function, void *arg) = NULL;
63145256Sjkoshy
64145256Sjkoshy/* Interrupt handler */
65315285Smjgint __read_mostly (*pmc_intr)(int cpu, struct trapframe *tf) = NULL;
66145256Sjkoshy
67148562Sjkoshy/* Bitmask of CPUs requiring servicing at hardclock time */
68222813Sattiliovolatile cpuset_t pmc_cpumask;
69146799Sjkoshy
70145256Sjkoshy/*
71147191Sjkoshy * A global count of SS mode PMCs.  When non-zero, this means that
72147191Sjkoshy * we have processes that are sampling the system as a whole.
73147191Sjkoshy */
74147191Sjkoshyvolatile int pmc_ss_count;
75147191Sjkoshy
76147191Sjkoshy/*
77145256Sjkoshy * Since PMC(4) may not be loaded in the current kernel, the
78145256Sjkoshy * convention followed is that a non-NULL value of 'pmc_hook' implies
79145256Sjkoshy * the presence of this kernel module.
80145256Sjkoshy *
81145256Sjkoshy * This requires us to protect 'pmc_hook' with a
82145256Sjkoshy * shared (sx) lock -- thus making the process of calling into PMC(4)
83145256Sjkoshy * somewhat more expensive than a simple 'if' check and indirect call.
84145256Sjkoshy */
85148562Sjkoshystruct sx pmc_sx;
86145256Sjkoshy
87233628Sfabient/*
88233628Sfabient * PMC Soft per cpu trapframe.
89233628Sfabient */
90233628Sfabientstruct trapframe pmc_tf[MAXCPU];
91233628Sfabient
92233628Sfabient/*
93233628Sfabient * PMC Soft use a global table to store registered events.
94233628Sfabient */
95233628Sfabient
96233628SfabientSYSCTL_NODE(_kern, OID_AUTO, hwpmc, CTLFLAG_RW, 0, "HWPMC parameters");
97233628Sfabient
98233628Sfabientstatic int pmc_softevents = 16;
99267992ShselaskySYSCTL_INT(_kern_hwpmc, OID_AUTO, softevents, CTLFLAG_RDTUN,
100233628Sfabient    &pmc_softevents, 0, "maximum number of soft events");
101233628Sfabient
102233628Sfabientstruct mtx pmc_softs_mtx;
103233628Sfabientint pmc_softs_count;
104233628Sfabientstruct pmc_soft **pmc_softs;
105233628Sfabient
106233628SfabientMTX_SYSINIT(pmc_soft_mtx, &pmc_softs_mtx, "pmc-softs", MTX_SPIN);
107233628Sfabient
108174395Sjkoshystatic void
109174395Sjkoshypmc_init_sx(void)
110174395Sjkoshy{
111174395Sjkoshy	sx_init_flags(&pmc_sx, "pmc-sx", SX_NOWITNESS);
112174395Sjkoshy}
113174395Sjkoshy
114174395SjkoshySYSINIT(pmcsx, SI_SUB_LOCK, SI_ORDER_MIDDLE, pmc_init_sx, NULL);
115174395Sjkoshy
116145256Sjkoshy/*
117183266Sjkoshy * Helper functions.
118145256Sjkoshy */
119145256Sjkoshy
120183266Sjkoshy/*
121183266Sjkoshy * A note on the CPU numbering scheme used by the hwpmc(4) driver.
122183266Sjkoshy *
123183266Sjkoshy * CPUs are denoted using numbers in the range 0..[pmc_cpu_max()-1].
124183266Sjkoshy * CPUs could be numbered "sparsely" in this range; the predicate
125183266Sjkoshy * `pmc_cpu_is_present()' is used to test whether a given CPU is
126183266Sjkoshy * physically present.
127183266Sjkoshy *
128183266Sjkoshy * Further, a CPU that is physically present may be administratively
129183266Sjkoshy * disabled or otherwise unavailable for use by hwpmc(4).  The
130183266Sjkoshy * `pmc_cpu_is_active()' predicate tests for CPU usability.  An
131183266Sjkoshy * "active" CPU participates in thread scheduling and can field
132183266Sjkoshy * interrupts raised by PMC hardware.
133183266Sjkoshy *
134183266Sjkoshy * On systems with hyperthreaded CPUs, multiple logical CPUs may share
135183266Sjkoshy * PMC hardware resources.  For such processors one logical CPU is
136183266Sjkoshy * denoted as the primary owner of the in-CPU PMC resources. The
137183266Sjkoshy * pmc_cpu_is_primary() predicate is used to distinguish this primary
138183266Sjkoshy * CPU from the others.
139183266Sjkoshy */
140183266Sjkoshy
141145256Sjkoshyint
142183266Sjkoshypmc_cpu_is_active(int cpu)
143183266Sjkoshy{
144183266Sjkoshy#ifdef	SMP
145183266Sjkoshy	return (pmc_cpu_is_present(cpu) &&
146222813Sattilio	    !CPU_ISSET(cpu, &hlt_cpus_mask));
147183266Sjkoshy#else
148183266Sjkoshy	return (1);
149183266Sjkoshy#endif
150183266Sjkoshy}
151183266Sjkoshy
152183266Sjkoshy/* Deprecated. */
153183266Sjkoshyint
154145256Sjkoshypmc_cpu_is_disabled(int cpu)
155145256Sjkoshy{
156183266Sjkoshy	return (!pmc_cpu_is_active(cpu));
157183266Sjkoshy}
158183266Sjkoshy
159183266Sjkoshyint
160183266Sjkoshypmc_cpu_is_present(int cpu)
161183266Sjkoshy{
162145256Sjkoshy#ifdef	SMP
163183266Sjkoshy	return (!CPU_ABSENT(cpu));
164145256Sjkoshy#else
165183266Sjkoshy	return (1);
166145256Sjkoshy#endif
167145256Sjkoshy}
168145256Sjkoshy
169145256Sjkoshyint
170183266Sjkoshypmc_cpu_is_primary(int cpu)
171145256Sjkoshy{
172145256Sjkoshy#ifdef	SMP
173222813Sattilio	return (!CPU_ISSET(cpu, &logical_cpus_mask));
174145256Sjkoshy#else
175183266Sjkoshy	return (1);
176145256Sjkoshy#endif
177145256Sjkoshy}
178183266Sjkoshy
179183266Sjkoshy
180183266Sjkoshy/*
181183266Sjkoshy * Return the maximum CPU number supported by the system.  The return
182183266Sjkoshy * value is used for scaling internal data structures and for runtime
183183266Sjkoshy * checks.
184183266Sjkoshy */
185183266Sjkoshyunsigned int
186183266Sjkoshypmc_cpu_max(void)
187183266Sjkoshy{
188183266Sjkoshy#ifdef	SMP
189183266Sjkoshy	return (mp_maxid+1);
190183266Sjkoshy#else
191183266Sjkoshy	return (1);
192183266Sjkoshy#endif
193183266Sjkoshy}
194183266Sjkoshy
195183266Sjkoshy#ifdef	INVARIANTS
196183266Sjkoshy
197183266Sjkoshy/*
198183266Sjkoshy * Return the count of CPUs in the `active' state in the system.
199183266Sjkoshy */
200183266Sjkoshyint
201183266Sjkoshypmc_cpu_max_active(void)
202183266Sjkoshy{
203183266Sjkoshy#ifdef	SMP
204183266Sjkoshy	/*
205183266Sjkoshy	 * When support for CPU hot-plugging is added to the kernel,
206183266Sjkoshy	 * this function would change to return the current number
207183266Sjkoshy	 * of "active" CPUs.
208183266Sjkoshy	 */
209183266Sjkoshy	return (mp_ncpus);
210183266Sjkoshy#else
211183266Sjkoshy	return (1);
212183266Sjkoshy#endif
213183266Sjkoshy}
214183266Sjkoshy
215183266Sjkoshy#endif
216233628Sfabient
217233628Sfabient/*
218233628Sfabient * Cleanup event name:
219233628Sfabient * - remove duplicate '_'
220233628Sfabient * - all uppercase
221233628Sfabient */
222233628Sfabientstatic void
223233628Sfabientpmc_soft_namecleanup(char *name)
224233628Sfabient{
225233628Sfabient	char *p, *q;
226233628Sfabient
227233628Sfabient	p = q = name;
228233628Sfabient
229233628Sfabient	for ( ; *p == '_' ; p++)
230233628Sfabient		;
231233628Sfabient	for ( ; *p ; p++) {
232233628Sfabient		if (*p == '_' && (*(p + 1) == '_' || *(p + 1) == '\0'))
233233628Sfabient			continue;
234233628Sfabient		else
235233628Sfabient			*q++ = toupper(*p);
236233628Sfabient	}
237233628Sfabient	*q = '\0';
238233628Sfabient}
239233628Sfabient
240233628Sfabientvoid
241233628Sfabientpmc_soft_ev_register(struct pmc_soft *ps)
242233628Sfabient{
243233628Sfabient	static int warned = 0;
244233628Sfabient	int n;
245233628Sfabient
246233628Sfabient	ps->ps_running  = 0;
247233628Sfabient	ps->ps_ev.pm_ev_code = 0; /* invalid */
248233628Sfabient	pmc_soft_namecleanup(ps->ps_ev.pm_ev_name);
249233628Sfabient
250233628Sfabient	mtx_lock_spin(&pmc_softs_mtx);
251233628Sfabient
252233628Sfabient	if (pmc_softs_count >= pmc_softevents) {
253233628Sfabient		/*
254233628Sfabient		 * XXX Reusing events can enter a race condition where
255233628Sfabient		 * new allocated event will be used as an old one.
256233628Sfabient		 */
257233628Sfabient		for (n = 0; n < pmc_softevents; n++)
258233628Sfabient			if (pmc_softs[n] == NULL)
259233628Sfabient				break;
260233628Sfabient		if (n == pmc_softevents) {
261233628Sfabient			mtx_unlock_spin(&pmc_softs_mtx);
262233628Sfabient			if (!warned) {
263233628Sfabient				printf("hwpmc: too many soft events, "
264233628Sfabient				    "increase kern.hwpmc.softevents tunable\n");
265233628Sfabient				warned = 1;
266233628Sfabient			}
267233628Sfabient			return;
268233628Sfabient		}
269233628Sfabient
270233628Sfabient		ps->ps_ev.pm_ev_code = PMC_EV_SOFT_FIRST + n;
271233628Sfabient		pmc_softs[n] = ps;
272233628Sfabient	} else {
273233628Sfabient		ps->ps_ev.pm_ev_code = PMC_EV_SOFT_FIRST + pmc_softs_count;
274233628Sfabient		pmc_softs[pmc_softs_count++] = ps;
275233628Sfabient	}
276233628Sfabient
277233628Sfabient	mtx_unlock_spin(&pmc_softs_mtx);
278233628Sfabient}
279233628Sfabient
280233628Sfabientvoid
281233628Sfabientpmc_soft_ev_deregister(struct pmc_soft *ps)
282233628Sfabient{
283233628Sfabient
284233628Sfabient	KASSERT(ps != NULL, ("pmc_soft_deregister: called with NULL"));
285233628Sfabient
286233628Sfabient	mtx_lock_spin(&pmc_softs_mtx);
287233628Sfabient
288233628Sfabient	if (ps->ps_ev.pm_ev_code != 0 &&
289233628Sfabient	    (ps->ps_ev.pm_ev_code - PMC_EV_SOFT_FIRST) < pmc_softevents) {
290233628Sfabient		KASSERT(ps->ps_ev.pm_ev_code >= PMC_EV_SOFT_FIRST &&
291233628Sfabient		    ps->ps_ev.pm_ev_code <= PMC_EV_SOFT_LAST,
292233628Sfabient		    ("pmc_soft_deregister: invalid event value"));
293233628Sfabient		pmc_softs[ps->ps_ev.pm_ev_code - PMC_EV_SOFT_FIRST] = NULL;
294233628Sfabient	}
295233628Sfabient
296233628Sfabient	mtx_unlock_spin(&pmc_softs_mtx);
297233628Sfabient}
298233628Sfabient
299233628Sfabientstruct pmc_soft *
300233628Sfabientpmc_soft_ev_acquire(enum pmc_event ev)
301233628Sfabient{
302233628Sfabient	struct pmc_soft *ps;
303233628Sfabient
304233628Sfabient	if (ev == 0 || (ev - PMC_EV_SOFT_FIRST) >= pmc_softevents)
305233628Sfabient		return NULL;
306233628Sfabient
307233628Sfabient	KASSERT(ev >= PMC_EV_SOFT_FIRST &&
308233628Sfabient	    ev <= PMC_EV_SOFT_LAST,
309233628Sfabient	    ("event out of range"));
310233628Sfabient
311233628Sfabient	mtx_lock_spin(&pmc_softs_mtx);
312233628Sfabient
313233628Sfabient	ps = pmc_softs[ev - PMC_EV_SOFT_FIRST];
314233628Sfabient	if (ps == NULL)
315233628Sfabient		mtx_unlock_spin(&pmc_softs_mtx);
316233628Sfabient
317233628Sfabient	return ps;
318233628Sfabient}
319233628Sfabient
320233628Sfabientvoid
321233628Sfabientpmc_soft_ev_release(struct pmc_soft *ps)
322233628Sfabient{
323233628Sfabient
324233628Sfabient	mtx_unlock_spin(&pmc_softs_mtx);
325233628Sfabient}
326233628Sfabient
327233628Sfabient/*
328233628Sfabient *  Initialise hwpmc.
329233628Sfabient */
330233628Sfabientstatic void
331233628Sfabientinit_hwpmc(void *dummy __unused)
332233628Sfabient{
333233628Sfabient	if (pmc_softevents <= 0 ||
334233628Sfabient	    pmc_softevents > PMC_EV_DYN_COUNT) {
335233628Sfabient		(void) printf("hwpmc: tunable \"softevents\"=%d out of "
336233628Sfabient		    "range.\n", pmc_softevents);
337233628Sfabient		pmc_softevents = PMC_EV_DYN_COUNT;
338233628Sfabient	}
339233628Sfabient	pmc_softs = malloc(pmc_softevents * sizeof(struct pmc_soft *), M_PMCHOOKS, M_NOWAIT|M_ZERO);
340233628Sfabient	KASSERT(pmc_softs != NULL, ("cannot allocate soft events table"));
341233628Sfabient}
342233628Sfabient
343233628SfabientSYSINIT(hwpmc, SI_SUB_KDTRACE, SI_ORDER_FIRST, init_hwpmc, NULL);
344233628Sfabient
345