Deleted Added
full compact
hwpmc_mod.c (225617) hwpmc_mod.c (226514)
1/*-
2 * Copyright (c) 2003-2008 Joseph Koshy
3 * Copyright (c) 2007 The FreeBSD Foundation
4 * All rights reserved.
5 *
6 * Portions of this software were developed by A. Joseph Koshy under
7 * sponsorship from the FreeBSD Foundation and Google, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 */
31
32#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2003-2008 Joseph Koshy
3 * Copyright (c) 2007 The FreeBSD Foundation
4 * All rights reserved.
5 *
6 * Portions of this software were developed by A. Joseph Koshy under
7 * sponsorship from the FreeBSD Foundation and Google, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/dev/hwpmc/hwpmc_mod.c 225617 2011-09-16 13:58:51Z kmacy $");
33__FBSDID("$FreeBSD: head/sys/dev/hwpmc/hwpmc_mod.c 226514 2011-10-18 15:25:43Z fabient $");
34
35#include <sys/param.h>
36#include <sys/eventhandler.h>
37#include <sys/jail.h>
38#include <sys/kernel.h>
39#include <sys/kthread.h>
40#include <sys/limits.h>
41#include <sys/lock.h>
42#include <sys/malloc.h>
43#include <sys/module.h>
44#include <sys/mount.h>
45#include <sys/mutex.h>
46#include <sys/pmc.h>
47#include <sys/pmckern.h>
48#include <sys/pmclog.h>
49#include <sys/priv.h>
50#include <sys/proc.h>
51#include <sys/queue.h>
52#include <sys/resourcevar.h>
53#include <sys/sched.h>
54#include <sys/signalvar.h>
55#include <sys/smp.h>
56#include <sys/sx.h>
57#include <sys/sysctl.h>
58#include <sys/sysent.h>
59#include <sys/systm.h>
60#include <sys/vnode.h>
61
62#include <sys/linker.h> /* needs to be after <sys/malloc.h> */
63
64#include <machine/atomic.h>
65#include <machine/md_var.h>
66
67#include <vm/vm.h>
68#include <vm/vm_extern.h>
69#include <vm/pmap.h>
70#include <vm/vm_map.h>
71#include <vm/vm_object.h>
72
73/*
74 * Types
75 */
76
77enum pmc_flags {
78 PMC_FLAG_NONE = 0x00, /* do nothing */
79 PMC_FLAG_REMOVE = 0x01, /* atomically remove entry from hash */
80 PMC_FLAG_ALLOCATE = 0x02, /* add entry to hash if not found */
81};
82
83/*
84 * The offset in sysent where the syscall is allocated.
85 */
86
87static int pmc_syscall_num = NO_SYSCALL;
88struct pmc_cpu **pmc_pcpu; /* per-cpu state */
89pmc_value_t *pmc_pcpu_saved; /* saved PMC values: CSW handling */
90
91#define PMC_PCPU_SAVED(C,R) pmc_pcpu_saved[(R) + md->pmd_npmc*(C)]
92
93struct mtx_pool *pmc_mtxpool;
94static int *pmc_pmcdisp; /* PMC row dispositions */
95
96#define PMC_ROW_DISP_IS_FREE(R) (pmc_pmcdisp[(R)] == 0)
97#define PMC_ROW_DISP_IS_THREAD(R) (pmc_pmcdisp[(R)] > 0)
98#define PMC_ROW_DISP_IS_STANDALONE(R) (pmc_pmcdisp[(R)] < 0)
99
100#define PMC_MARK_ROW_FREE(R) do { \
101 pmc_pmcdisp[(R)] = 0; \
102} while (0)
103
104#define PMC_MARK_ROW_STANDALONE(R) do { \
105 KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \
106 __LINE__)); \
107 atomic_add_int(&pmc_pmcdisp[(R)], -1); \
108 KASSERT(pmc_pmcdisp[(R)] >= (-pmc_cpu_max_active()), \
109 ("[pmc,%d] row disposition error", __LINE__)); \
110} while (0)
111
112#define PMC_UNMARK_ROW_STANDALONE(R) do { \
113 atomic_add_int(&pmc_pmcdisp[(R)], 1); \
114 KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \
115 __LINE__)); \
116} while (0)
117
118#define PMC_MARK_ROW_THREAD(R) do { \
119 KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \
120 __LINE__)); \
121 atomic_add_int(&pmc_pmcdisp[(R)], 1); \
122} while (0)
123
124#define PMC_UNMARK_ROW_THREAD(R) do { \
125 atomic_add_int(&pmc_pmcdisp[(R)], -1); \
126 KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \
127 __LINE__)); \
128} while (0)
129
130
131/* various event handlers */
132static eventhandler_tag pmc_exit_tag, pmc_fork_tag;
133
134/* Module statistics */
135struct pmc_op_getdriverstats pmc_stats;
136
137/* Machine/processor dependent operations */
138static struct pmc_mdep *md;
139
140/*
141 * Hash tables mapping owner processes and target threads to PMCs.
142 */
143
144struct mtx pmc_processhash_mtx; /* spin mutex */
145static u_long pmc_processhashmask;
146static LIST_HEAD(pmc_processhash, pmc_process) *pmc_processhash;
147
148/*
149 * Hash table of PMC owner descriptors. This table is protected by
150 * the shared PMC "sx" lock.
151 */
152
153static u_long pmc_ownerhashmask;
154static LIST_HEAD(pmc_ownerhash, pmc_owner) *pmc_ownerhash;
155
156/*
157 * List of PMC owners with system-wide sampling PMCs.
158 */
159
160static LIST_HEAD(, pmc_owner) pmc_ss_owners;
161
162
163/*
164 * A map of row indices to classdep structures.
165 */
166static struct pmc_classdep **pmc_rowindex_to_classdep;
167
168/*
169 * Prototypes
170 */
171
172#ifdef DEBUG
173static int pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS);
174static int pmc_debugflags_parse(char *newstr, char *fence);
175#endif
176
177static int load(struct module *module, int cmd, void *arg);
178static int pmc_attach_process(struct proc *p, struct pmc *pm);
179static struct pmc *pmc_allocate_pmc_descriptor(void);
180static struct pmc_owner *pmc_allocate_owner_descriptor(struct proc *p);
181static int pmc_attach_one_process(struct proc *p, struct pmc *pm);
182static int pmc_can_allocate_rowindex(struct proc *p, unsigned int ri,
183 int cpu);
184static int pmc_can_attach(struct pmc *pm, struct proc *p);
185static void pmc_capture_user_callchain(int cpu, struct trapframe *tf);
186static void pmc_cleanup(void);
187static int pmc_detach_process(struct proc *p, struct pmc *pm);
188static int pmc_detach_one_process(struct proc *p, struct pmc *pm,
189 int flags);
190static void pmc_destroy_owner_descriptor(struct pmc_owner *po);
191static struct pmc_owner *pmc_find_owner_descriptor(struct proc *p);
192static int pmc_find_pmc(pmc_id_t pmcid, struct pmc **pm);
193static struct pmc *pmc_find_pmc_descriptor_in_process(struct pmc_owner *po,
194 pmc_id_t pmc);
195static struct pmc_process *pmc_find_process_descriptor(struct proc *p,
196 uint32_t mode);
197static void pmc_force_context_switch(void);
198static void pmc_link_target_process(struct pmc *pm,
199 struct pmc_process *pp);
200static void pmc_log_all_process_mappings(struct pmc_owner *po);
201static void pmc_log_kernel_mappings(struct pmc *pm);
202static void pmc_log_process_mappings(struct pmc_owner *po, struct proc *p);
203static void pmc_maybe_remove_owner(struct pmc_owner *po);
204static void pmc_process_csw_in(struct thread *td);
205static void pmc_process_csw_out(struct thread *td);
206static void pmc_process_exit(void *arg, struct proc *p);
207static void pmc_process_fork(void *arg, struct proc *p1,
208 struct proc *p2, int n);
209static void pmc_process_samples(int cpu);
210static void pmc_release_pmc_descriptor(struct pmc *pmc);
211static void pmc_remove_owner(struct pmc_owner *po);
212static void pmc_remove_process_descriptor(struct pmc_process *pp);
213static void pmc_restore_cpu_binding(struct pmc_binding *pb);
214static void pmc_save_cpu_binding(struct pmc_binding *pb);
215static void pmc_select_cpu(int cpu);
216static int pmc_start(struct pmc *pm);
217static int pmc_stop(struct pmc *pm);
218static int pmc_syscall_handler(struct thread *td, void *syscall_args);
219static void pmc_unlink_target_process(struct pmc *pmc,
220 struct pmc_process *pp);
221
222/*
223 * Kernel tunables and sysctl(8) interface.
224 */
225
226SYSCTL_NODE(_kern, OID_AUTO, hwpmc, CTLFLAG_RW, 0, "HWPMC parameters");
227
228static int pmc_callchaindepth = PMC_CALLCHAIN_DEPTH;
229TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "callchaindepth", &pmc_callchaindepth);
230SYSCTL_INT(_kern_hwpmc, OID_AUTO, callchaindepth, CTLFLAG_TUN|CTLFLAG_RD,
231 &pmc_callchaindepth, 0, "depth of call chain records");
232
233#ifdef DEBUG
234struct pmc_debugflags pmc_debugflags = PMC_DEBUG_DEFAULT_FLAGS;
235char pmc_debugstr[PMC_DEBUG_STRSIZE];
236TUNABLE_STR(PMC_SYSCTL_NAME_PREFIX "debugflags", pmc_debugstr,
237 sizeof(pmc_debugstr));
238SYSCTL_PROC(_kern_hwpmc, OID_AUTO, debugflags,
239 CTLTYPE_STRING|CTLFLAG_RW|CTLFLAG_TUN,
240 0, 0, pmc_debugflags_sysctl_handler, "A", "debug flags");
241#endif
242
243/*
244 * kern.hwpmc.hashrows -- determines the number of rows in the
245 * of the hash table used to look up threads
246 */
247
248static int pmc_hashsize = PMC_HASH_SIZE;
249TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "hashsize", &pmc_hashsize);
250SYSCTL_INT(_kern_hwpmc, OID_AUTO, hashsize, CTLFLAG_TUN|CTLFLAG_RD,
251 &pmc_hashsize, 0, "rows in hash tables");
252
253/*
254 * kern.hwpmc.nsamples --- number of PC samples/callchain stacks per CPU
255 */
256
257static int pmc_nsamples = PMC_NSAMPLES;
258TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "nsamples", &pmc_nsamples);
259SYSCTL_INT(_kern_hwpmc, OID_AUTO, nsamples, CTLFLAG_TUN|CTLFLAG_RD,
260 &pmc_nsamples, 0, "number of PC samples per CPU");
261
262
263/*
264 * kern.hwpmc.mtxpoolsize -- number of mutexes in the mutex pool.
265 */
266
267static int pmc_mtxpool_size = PMC_MTXPOOL_SIZE;
268TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "mtxpoolsize", &pmc_mtxpool_size);
269SYSCTL_INT(_kern_hwpmc, OID_AUTO, mtxpoolsize, CTLFLAG_TUN|CTLFLAG_RD,
270 &pmc_mtxpool_size, 0, "size of spin mutex pool");
271
272
273/*
274 * security.bsd.unprivileged_syspmcs -- allow non-root processes to
275 * allocate system-wide PMCs.
276 *
277 * Allowing unprivileged processes to allocate system PMCs is convenient
278 * if system-wide measurements need to be taken concurrently with other
279 * per-process measurements. This feature is turned off by default.
280 */
281
282static int pmc_unprivileged_syspmcs = 0;
283TUNABLE_INT("security.bsd.unprivileged_syspmcs", &pmc_unprivileged_syspmcs);
284SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_syspmcs, CTLFLAG_RW,
285 &pmc_unprivileged_syspmcs, 0,
286 "allow unprivileged process to allocate system PMCs");
287
288/*
289 * Hash function. Discard the lower 2 bits of the pointer since
290 * these are always zero for our uses. The hash multiplier is
291 * round((2^LONG_BIT) * ((sqrt(5)-1)/2)).
292 */
293
294#if LONG_BIT == 64
295#define _PMC_HM 11400714819323198486u
296#elif LONG_BIT == 32
297#define _PMC_HM 2654435769u
298#else
299#error Must know the size of 'long' to compile
300#endif
301
302#define PMC_HASH_PTR(P,M) ((((unsigned long) (P) >> 2) * _PMC_HM) & (M))
303
304/*
305 * Syscall structures
306 */
307
308/* The `sysent' for the new syscall */
309static struct sysent pmc_sysent = {
310 2, /* sy_narg */
311 pmc_syscall_handler /* sy_call */
312};
313
314static struct syscall_module_data pmc_syscall_mod = {
315 load,
316 NULL,
317 &pmc_syscall_num,
318 &pmc_sysent,
319 { 0, NULL }
320};
321
322static moduledata_t pmc_mod = {
323 PMC_MODULE_NAME,
324 syscall_module_handler,
325 &pmc_syscall_mod
326};
327
328DECLARE_MODULE(pmc, pmc_mod, SI_SUB_SMP, SI_ORDER_ANY);
329MODULE_VERSION(pmc, PMC_VERSION);
330
331#ifdef DEBUG
332enum pmc_dbgparse_state {
333 PMCDS_WS, /* in whitespace */
334 PMCDS_MAJOR, /* seen a major keyword */
335 PMCDS_MINOR
336};
337
338static int
339pmc_debugflags_parse(char *newstr, char *fence)
340{
341 char c, *p, *q;
342 struct pmc_debugflags *tmpflags;
343 int error, found, *newbits, tmp;
344 size_t kwlen;
345
346 tmpflags = malloc(sizeof(*tmpflags), M_PMC, M_WAITOK|M_ZERO);
347
348 p = newstr;
349 error = 0;
350
351 for (; p < fence && (c = *p); p++) {
352
353 /* skip white space */
354 if (c == ' ' || c == '\t')
355 continue;
356
357 /* look for a keyword followed by "=" */
358 for (q = p; p < fence && (c = *p) && c != '='; p++)
359 ;
360 if (c != '=') {
361 error = EINVAL;
362 goto done;
363 }
364
365 kwlen = p - q;
366 newbits = NULL;
367
368 /* lookup flag group name */
369#define DBG_SET_FLAG_MAJ(S,F) \
370 if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0) \
371 newbits = &tmpflags->pdb_ ## F;
372
373 DBG_SET_FLAG_MAJ("cpu", CPU);
374 DBG_SET_FLAG_MAJ("csw", CSW);
375 DBG_SET_FLAG_MAJ("logging", LOG);
376 DBG_SET_FLAG_MAJ("module", MOD);
377 DBG_SET_FLAG_MAJ("md", MDP);
378 DBG_SET_FLAG_MAJ("owner", OWN);
379 DBG_SET_FLAG_MAJ("pmc", PMC);
380 DBG_SET_FLAG_MAJ("process", PRC);
381 DBG_SET_FLAG_MAJ("sampling", SAM);
382
383 if (newbits == NULL) {
384 error = EINVAL;
385 goto done;
386 }
387
388 p++; /* skip the '=' */
389
390 /* Now parse the individual flags */
391 tmp = 0;
392 newflag:
393 for (q = p; p < fence && (c = *p); p++)
394 if (c == ' ' || c == '\t' || c == ',')
395 break;
396
397 /* p == fence or c == ws or c == "," or c == 0 */
398
399 if ((kwlen = p - q) == 0) {
400 *newbits = tmp;
401 continue;
402 }
403
404 found = 0;
405#define DBG_SET_FLAG_MIN(S,F) \
406 if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0) \
407 tmp |= found = (1 << PMC_DEBUG_MIN_ ## F)
408
409 /* a '*' denotes all possible flags in the group */
410 if (kwlen == 1 && *q == '*')
411 tmp = found = ~0;
412 /* look for individual flag names */
413 DBG_SET_FLAG_MIN("allocaterow", ALR);
414 DBG_SET_FLAG_MIN("allocate", ALL);
415 DBG_SET_FLAG_MIN("attach", ATT);
416 DBG_SET_FLAG_MIN("bind", BND);
417 DBG_SET_FLAG_MIN("config", CFG);
418 DBG_SET_FLAG_MIN("exec", EXC);
419 DBG_SET_FLAG_MIN("exit", EXT);
420 DBG_SET_FLAG_MIN("find", FND);
421 DBG_SET_FLAG_MIN("flush", FLS);
422 DBG_SET_FLAG_MIN("fork", FRK);
423 DBG_SET_FLAG_MIN("getbuf", GTB);
424 DBG_SET_FLAG_MIN("hook", PMH);
425 DBG_SET_FLAG_MIN("init", INI);
426 DBG_SET_FLAG_MIN("intr", INT);
427 DBG_SET_FLAG_MIN("linktarget", TLK);
428 DBG_SET_FLAG_MIN("mayberemove", OMR);
429 DBG_SET_FLAG_MIN("ops", OPS);
430 DBG_SET_FLAG_MIN("read", REA);
431 DBG_SET_FLAG_MIN("register", REG);
432 DBG_SET_FLAG_MIN("release", REL);
433 DBG_SET_FLAG_MIN("remove", ORM);
434 DBG_SET_FLAG_MIN("sample", SAM);
435 DBG_SET_FLAG_MIN("scheduleio", SIO);
436 DBG_SET_FLAG_MIN("select", SEL);
437 DBG_SET_FLAG_MIN("signal", SIG);
438 DBG_SET_FLAG_MIN("swi", SWI);
439 DBG_SET_FLAG_MIN("swo", SWO);
440 DBG_SET_FLAG_MIN("start", STA);
441 DBG_SET_FLAG_MIN("stop", STO);
442 DBG_SET_FLAG_MIN("syscall", PMS);
443 DBG_SET_FLAG_MIN("unlinktarget", TUL);
444 DBG_SET_FLAG_MIN("write", WRI);
445 if (found == 0) {
446 /* unrecognized flag name */
447 error = EINVAL;
448 goto done;
449 }
450
451 if (c == 0 || c == ' ' || c == '\t') { /* end of flag group */
452 *newbits = tmp;
453 continue;
454 }
455
456 p++;
457 goto newflag;
458 }
459
460 /* save the new flag set */
461 bcopy(tmpflags, &pmc_debugflags, sizeof(pmc_debugflags));
462
463 done:
464 free(tmpflags, M_PMC);
465 return error;
466}
467
468static int
469pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS)
470{
471 char *fence, *newstr;
472 int error;
473 unsigned int n;
474
475 (void) arg1; (void) arg2; /* unused parameters */
476
477 n = sizeof(pmc_debugstr);
478 newstr = malloc(n, M_PMC, M_WAITOK|M_ZERO);
479 (void) strlcpy(newstr, pmc_debugstr, n);
480
481 error = sysctl_handle_string(oidp, newstr, n, req);
482
483 /* if there is a new string, parse and copy it */
484 if (error == 0 && req->newptr != NULL) {
485 fence = newstr + (n < req->newlen ? n : req->newlen + 1);
486 if ((error = pmc_debugflags_parse(newstr, fence)) == 0)
487 (void) strlcpy(pmc_debugstr, newstr,
488 sizeof(pmc_debugstr));
489 }
490
491 free(newstr, M_PMC);
492
493 return error;
494}
495#endif
496
497/*
498 * Map a row index to a classdep structure and return the adjusted row
499 * index for the PMC class index.
500 */
501static struct pmc_classdep *
502pmc_ri_to_classdep(struct pmc_mdep *md, int ri, int *adjri)
503{
504 struct pmc_classdep *pcd;
505
506 (void) md;
507
508 KASSERT(ri >= 0 && ri < md->pmd_npmc,
509 ("[pmc,%d] illegal row-index %d", __LINE__, ri));
510
511 pcd = pmc_rowindex_to_classdep[ri];
512
513 KASSERT(pcd != NULL,
514 ("[pmc,%d] ri %d null pcd", __LINE__, ri));
515
516 *adjri = ri - pcd->pcd_ri;
517
518 KASSERT(*adjri >= 0 && *adjri < pcd->pcd_num,
519 ("[pmc,%d] adjusted row-index %d", __LINE__, *adjri));
520
521 return (pcd);
522}
523
524/*
525 * Concurrency Control
526 *
527 * The driver manages the following data structures:
528 *
529 * - target process descriptors, one per target process
530 * - owner process descriptors (and attached lists), one per owner process
531 * - lookup hash tables for owner and target processes
532 * - PMC descriptors (and attached lists)
533 * - per-cpu hardware state
534 * - the 'hook' variable through which the kernel calls into
535 * this module
536 * - the machine hardware state (managed by the MD layer)
537 *
538 * These data structures are accessed from:
539 *
540 * - thread context-switch code
541 * - interrupt handlers (possibly on multiple cpus)
542 * - kernel threads on multiple cpus running on behalf of user
543 * processes doing system calls
544 * - this driver's private kernel threads
545 *
546 * = Locks and Locking strategy =
547 *
548 * The driver uses four locking strategies for its operation:
549 *
550 * - The global SX lock "pmc_sx" is used to protect internal
551 * data structures.
552 *
553 * Calls into the module by syscall() start with this lock being
554 * held in exclusive mode. Depending on the requested operation,
555 * the lock may be downgraded to 'shared' mode to allow more
556 * concurrent readers into the module. Calls into the module from
557 * other parts of the kernel acquire the lock in shared mode.
558 *
559 * This SX lock is held in exclusive mode for any operations that
560 * modify the linkages between the driver's internal data structures.
561 *
562 * The 'pmc_hook' function pointer is also protected by this lock.
563 * It is only examined with the sx lock held in exclusive mode. The
564 * kernel module is allowed to be unloaded only with the sx lock held
565 * in exclusive mode. In normal syscall handling, after acquiring the
566 * pmc_sx lock we first check that 'pmc_hook' is non-null before
567 * proceeding. This prevents races between the thread unloading the module
568 * and other threads seeking to use the module.
569 *
570 * - Lookups of target process structures and owner process structures
571 * cannot use the global "pmc_sx" SX lock because these lookups need
572 * to happen during context switches and in other critical sections
573 * where sleeping is not allowed. We protect these lookup tables
574 * with their own private spin-mutexes, "pmc_processhash_mtx" and
575 * "pmc_ownerhash_mtx".
576 *
577 * - Interrupt handlers work in a lock free manner. At interrupt
578 * time, handlers look at the PMC pointer (phw->phw_pmc) configured
579 * when the PMC was started. If this pointer is NULL, the interrupt
580 * is ignored after updating driver statistics. We ensure that this
581 * pointer is set (using an atomic operation if necessary) before the
582 * PMC hardware is started. Conversely, this pointer is unset atomically
583 * only after the PMC hardware is stopped.
584 *
585 * We ensure that everything needed for the operation of an
586 * interrupt handler is available without it needing to acquire any
587 * locks. We also ensure that a PMC's software state is destroyed only
588 * after the PMC is taken off hardware (on all CPUs).
589 *
590 * - Context-switch handling with process-private PMCs needs more
591 * care.
592 *
593 * A given process may be the target of multiple PMCs. For example,
594 * PMCATTACH and PMCDETACH may be requested by a process on one CPU
595 * while the target process is running on another. A PMC could also
596 * be getting released because its owner is exiting. We tackle
597 * these situations in the following manner:
598 *
599 * - each target process structure 'pmc_process' has an array
600 * of 'struct pmc *' pointers, one for each hardware PMC.
601 *
602 * - At context switch IN time, each "target" PMC in RUNNING state
603 * gets started on hardware and a pointer to each PMC is copied into
604 * the per-cpu phw array. The 'runcount' for the PMC is
605 * incremented.
606 *
607 * - At context switch OUT time, all process-virtual PMCs are stopped
608 * on hardware. The saved value is added to the PMCs value field
609 * only if the PMC is in a non-deleted state (the PMCs state could
610 * have changed during the current time slice).
611 *
612 * Note that since in-between a switch IN on a processor and a switch
613 * OUT, the PMC could have been released on another CPU. Therefore
614 * context switch OUT always looks at the hardware state to turn
615 * OFF PMCs and will update a PMC's saved value only if reachable
616 * from the target process record.
617 *
618 * - OP PMCRELEASE could be called on a PMC at any time (the PMC could
619 * be attached to many processes at the time of the call and could
620 * be active on multiple CPUs).
621 *
622 * We prevent further scheduling of the PMC by marking it as in
623 * state 'DELETED'. If the runcount of the PMC is non-zero then
624 * this PMC is currently running on a CPU somewhere. The thread
625 * doing the PMCRELEASE operation waits by repeatedly doing a
626 * pause() till the runcount comes to zero.
627 *
628 * The contents of a PMC descriptor (struct pmc) are protected using
629 * a spin-mutex. In order to save space, we use a mutex pool.
630 *
631 * In terms of lock types used by witness(4), we use:
632 * - Type "pmc-sx", used by the global SX lock.
633 * - Type "pmc-sleep", for sleep mutexes used by logger threads.
634 * - Type "pmc-per-proc", for protecting PMC owner descriptors.
635 * - Type "pmc-leaf", used for all other spin mutexes.
636 */
637
638/*
639 * save the cpu binding of the current kthread
640 */
641
642static void
643pmc_save_cpu_binding(struct pmc_binding *pb)
644{
645 PMCDBG(CPU,BND,2, "%s", "save-cpu");
646 thread_lock(curthread);
647 pb->pb_bound = sched_is_bound(curthread);
648 pb->pb_cpu = curthread->td_oncpu;
649 thread_unlock(curthread);
650 PMCDBG(CPU,BND,2, "save-cpu cpu=%d", pb->pb_cpu);
651}
652
653/*
654 * restore the cpu binding of the current thread
655 */
656
657static void
658pmc_restore_cpu_binding(struct pmc_binding *pb)
659{
660 PMCDBG(CPU,BND,2, "restore-cpu curcpu=%d restore=%d",
661 curthread->td_oncpu, pb->pb_cpu);
662 thread_lock(curthread);
663 if (pb->pb_bound)
664 sched_bind(curthread, pb->pb_cpu);
665 else
666 sched_unbind(curthread);
667 thread_unlock(curthread);
668 PMCDBG(CPU,BND,2, "%s", "restore-cpu done");
669}
670
671/*
672 * move execution over the specified cpu and bind it there.
673 */
674
675static void
676pmc_select_cpu(int cpu)
677{
678 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
679 ("[pmc,%d] bad cpu number %d", __LINE__, cpu));
680
681 /* Never move to an inactive CPU. */
682 KASSERT(pmc_cpu_is_active(cpu), ("[pmc,%d] selecting inactive "
683 "CPU %d", __LINE__, cpu));
684
685 PMCDBG(CPU,SEL,2, "select-cpu cpu=%d", cpu);
686 thread_lock(curthread);
687 sched_bind(curthread, cpu);
688 thread_unlock(curthread);
689
690 KASSERT(curthread->td_oncpu == cpu,
691 ("[pmc,%d] CPU not bound [cpu=%d, curr=%d]", __LINE__,
692 cpu, curthread->td_oncpu));
693
694 PMCDBG(CPU,SEL,2, "select-cpu cpu=%d ok", cpu);
695}
696
697/*
698 * Force a context switch.
699 *
700 * We do this by pause'ing for 1 tick -- invoking mi_switch() is not
701 * guaranteed to force a context switch.
702 */
703
704static void
705pmc_force_context_switch(void)
706{
707
708 pause("pmcctx", 1);
709}
710
711/*
712 * Get the file name for an executable. This is a simple wrapper
713 * around vn_fullpath(9).
714 */
715
716static void
717pmc_getfilename(struct vnode *v, char **fullpath, char **freepath)
718{
719
720 *fullpath = "unknown";
721 *freepath = NULL;
722 vn_fullpath(curthread, v, fullpath, freepath);
723}
724
725/*
726 * remove an process owning PMCs
727 */
728
729void
730pmc_remove_owner(struct pmc_owner *po)
731{
732 struct pmc *pm, *tmp;
733
734 sx_assert(&pmc_sx, SX_XLOCKED);
735
736 PMCDBG(OWN,ORM,1, "remove-owner po=%p", po);
737
738 /* Remove descriptor from the owner hash table */
739 LIST_REMOVE(po, po_next);
740
741 /* release all owned PMC descriptors */
742 LIST_FOREACH_SAFE(pm, &po->po_pmcs, pm_next, tmp) {
743 PMCDBG(OWN,ORM,2, "pmc=%p", pm);
744 KASSERT(pm->pm_owner == po,
745 ("[pmc,%d] owner %p != po %p", __LINE__, pm->pm_owner, po));
746
747 pmc_release_pmc_descriptor(pm); /* will unlink from the list */
748 }
749
750 KASSERT(po->po_sscount == 0,
751 ("[pmc,%d] SS count not zero", __LINE__));
752 KASSERT(LIST_EMPTY(&po->po_pmcs),
753 ("[pmc,%d] PMC list not empty", __LINE__));
754
755 /* de-configure the log file if present */
756 if (po->po_flags & PMC_PO_OWNS_LOGFILE)
757 pmclog_deconfigure_log(po);
758}
759
760/*
761 * remove an owner process record if all conditions are met.
762 */
763
764static void
765pmc_maybe_remove_owner(struct pmc_owner *po)
766{
767
768 PMCDBG(OWN,OMR,1, "maybe-remove-owner po=%p", po);
769
770 /*
771 * Remove owner record if
772 * - this process does not own any PMCs
773 * - this process has not allocated a system-wide sampling buffer
774 */
775
776 if (LIST_EMPTY(&po->po_pmcs) &&
777 ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)) {
778 pmc_remove_owner(po);
779 pmc_destroy_owner_descriptor(po);
780 }
781}
782
783/*
784 * Add an association between a target process and a PMC.
785 */
786
787static void
788pmc_link_target_process(struct pmc *pm, struct pmc_process *pp)
789{
790 int ri;
791 struct pmc_target *pt;
792
793 sx_assert(&pmc_sx, SX_XLOCKED);
794
795 KASSERT(pm != NULL && pp != NULL,
796 ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp));
797 KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)),
798 ("[pmc,%d] Attaching a non-process-virtual pmc=%p to pid=%d",
799 __LINE__, pm, pp->pp_proc->p_pid));
800 KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= ((int) md->pmd_npmc - 1),
801 ("[pmc,%d] Illegal reference count %d for process record %p",
802 __LINE__, pp->pp_refcnt, (void *) pp));
803
804 ri = PMC_TO_ROWINDEX(pm);
805
806 PMCDBG(PRC,TLK,1, "link-target pmc=%p ri=%d pmc-process=%p",
807 pm, ri, pp);
808
809#ifdef DEBUG
810 LIST_FOREACH(pt, &pm->pm_targets, pt_next)
811 if (pt->pt_process == pp)
812 KASSERT(0, ("[pmc,%d] pp %p already in pmc %p targets",
813 __LINE__, pp, pm));
814#endif
815
816 pt = malloc(sizeof(struct pmc_target), M_PMC, M_WAITOK|M_ZERO);
817 pt->pt_process = pp;
818
819 LIST_INSERT_HEAD(&pm->pm_targets, pt, pt_next);
820
821 atomic_store_rel_ptr((uintptr_t *)&pp->pp_pmcs[ri].pp_pmc,
822 (uintptr_t)pm);
823
824 if (pm->pm_owner->po_owner == pp->pp_proc)
825 pm->pm_flags |= PMC_F_ATTACHED_TO_OWNER;
826
827 /*
828 * Initialize the per-process values at this row index.
829 */
830 pp->pp_pmcs[ri].pp_pmcval = PMC_TO_MODE(pm) == PMC_MODE_TS ?
831 pm->pm_sc.pm_reloadcount : 0;
832
833 pp->pp_refcnt++;
834
835}
836
837/*
838 * Removes the association between a target process and a PMC.
839 */
840
841static void
842pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp)
843{
844 int ri;
845 struct proc *p;
846 struct pmc_target *ptgt;
847
848 sx_assert(&pmc_sx, SX_XLOCKED);
849
850 KASSERT(pm != NULL && pp != NULL,
851 ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp));
852
853 KASSERT(pp->pp_refcnt >= 1 && pp->pp_refcnt <= (int) md->pmd_npmc,
854 ("[pmc,%d] Illegal ref count %d on process record %p",
855 __LINE__, pp->pp_refcnt, (void *) pp));
856
857 ri = PMC_TO_ROWINDEX(pm);
858
859 PMCDBG(PRC,TUL,1, "unlink-target pmc=%p ri=%d pmc-process=%p",
860 pm, ri, pp);
861
862 KASSERT(pp->pp_pmcs[ri].pp_pmc == pm,
863 ("[pmc,%d] PMC ri %d mismatch pmc %p pp->[ri] %p", __LINE__,
864 ri, pm, pp->pp_pmcs[ri].pp_pmc));
865
866 pp->pp_pmcs[ri].pp_pmc = NULL;
867 pp->pp_pmcs[ri].pp_pmcval = (pmc_value_t) 0;
868
869 /* Remove owner-specific flags */
870 if (pm->pm_owner->po_owner == pp->pp_proc) {
871 pp->pp_flags &= ~PMC_PP_ENABLE_MSR_ACCESS;
872 pm->pm_flags &= ~PMC_F_ATTACHED_TO_OWNER;
873 }
874
875 pp->pp_refcnt--;
876
877 /* Remove the target process from the PMC structure */
878 LIST_FOREACH(ptgt, &pm->pm_targets, pt_next)
879 if (ptgt->pt_process == pp)
880 break;
881
882 KASSERT(ptgt != NULL, ("[pmc,%d] process %p (pp: %p) not found "
883 "in pmc %p", __LINE__, pp->pp_proc, pp, pm));
884
885 LIST_REMOVE(ptgt, pt_next);
886 free(ptgt, M_PMC);
887
888 /* if the PMC now lacks targets, send the owner a SIGIO */
889 if (LIST_EMPTY(&pm->pm_targets)) {
890 p = pm->pm_owner->po_owner;
891 PROC_LOCK(p);
892 kern_psignal(p, SIGIO);
893 PROC_UNLOCK(p);
894
895 PMCDBG(PRC,SIG,2, "signalling proc=%p signal=%d", p,
896 SIGIO);
897 }
898}
899
900/*
901 * Check if PMC 'pm' may be attached to target process 't'.
902 */
903
904static int
905pmc_can_attach(struct pmc *pm, struct proc *t)
906{
907 struct proc *o; /* pmc owner */
908 struct ucred *oc, *tc; /* owner, target credentials */
909 int decline_attach, i;
910
911 /*
912 * A PMC's owner can always attach that PMC to itself.
913 */
914
915 if ((o = pm->pm_owner->po_owner) == t)
916 return 0;
917
918 PROC_LOCK(o);
919 oc = o->p_ucred;
920 crhold(oc);
921 PROC_UNLOCK(o);
922
923 PROC_LOCK(t);
924 tc = t->p_ucred;
925 crhold(tc);
926 PROC_UNLOCK(t);
927
928 /*
929 * The effective uid of the PMC owner should match at least one
930 * of the {effective,real,saved} uids of the target process.
931 */
932
933 decline_attach = oc->cr_uid != tc->cr_uid &&
934 oc->cr_uid != tc->cr_svuid &&
935 oc->cr_uid != tc->cr_ruid;
936
937 /*
938 * Every one of the target's group ids, must be in the owner's
939 * group list.
940 */
941 for (i = 0; !decline_attach && i < tc->cr_ngroups; i++)
942 decline_attach = !groupmember(tc->cr_groups[i], oc);
943
944 /* check the read and saved gids too */
945 if (decline_attach == 0)
946 decline_attach = !groupmember(tc->cr_rgid, oc) ||
947 !groupmember(tc->cr_svgid, oc);
948
949 crfree(tc);
950 crfree(oc);
951
952 return !decline_attach;
953}
954
955/*
956 * Attach a process to a PMC.
957 */
958
959static int
960pmc_attach_one_process(struct proc *p, struct pmc *pm)
961{
962 int ri;
963 char *fullpath, *freepath;
964 struct pmc_process *pp;
965
966 sx_assert(&pmc_sx, SX_XLOCKED);
967
968 PMCDBG(PRC,ATT,2, "attach-one pm=%p ri=%d proc=%p (%d, %s)", pm,
969 PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm);
970
971 /*
972 * Locate the process descriptor corresponding to process 'p',
973 * allocating space as needed.
974 *
975 * Verify that rowindex 'pm_rowindex' is free in the process
976 * descriptor.
977 *
978 * If not, allocate space for a descriptor and link the
979 * process descriptor and PMC.
980 */
981 ri = PMC_TO_ROWINDEX(pm);
982
983 if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_ALLOCATE)) == NULL)
984 return ENOMEM;
985
986 if (pp->pp_pmcs[ri].pp_pmc == pm) /* already present at slot [ri] */
987 return EEXIST;
988
989 if (pp->pp_pmcs[ri].pp_pmc != NULL)
990 return EBUSY;
991
992 pmc_link_target_process(pm, pp);
993
994 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) &&
995 (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) == 0)
996 pm->pm_flags |= PMC_F_NEEDS_LOGFILE;
997
998 pm->pm_flags |= PMC_F_ATTACH_DONE; /* mark as attached */
999
1000 /* issue an attach event to a configured log file */
1001 if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE) {
1002 pmc_getfilename(p->p_textvp, &fullpath, &freepath);
1003 if (p->p_flag & P_KTHREAD) {
1004 fullpath = kernelname;
1005 freepath = NULL;
1006 } else
1007 pmclog_process_pmcattach(pm, p->p_pid, fullpath);
1008 if (freepath)
1009 free(freepath, M_TEMP);
1010 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1011 pmc_log_process_mappings(pm->pm_owner, p);
1012 }
1013 /* mark process as using HWPMCs */
1014 PROC_LOCK(p);
1015 p->p_flag |= P_HWPMC;
1016 PROC_UNLOCK(p);
1017
1018 return 0;
1019}
1020
1021/*
1022 * Attach a process and optionally its children
1023 */
1024
1025static int
1026pmc_attach_process(struct proc *p, struct pmc *pm)
1027{
1028 int error;
1029 struct proc *top;
1030
1031 sx_assert(&pmc_sx, SX_XLOCKED);
1032
1033 PMCDBG(PRC,ATT,1, "attach pm=%p ri=%d proc=%p (%d, %s)", pm,
1034 PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm);
1035
1036
1037 /*
1038 * If this PMC successfully allowed a GETMSR operation
1039 * in the past, disallow further ATTACHes.
1040 */
1041
1042 if ((pm->pm_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0)
1043 return EPERM;
1044
1045 if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0)
1046 return pmc_attach_one_process(p, pm);
1047
1048 /*
1049 * Traverse all child processes, attaching them to
1050 * this PMC.
1051 */
1052
1053 sx_slock(&proctree_lock);
1054
1055 top = p;
1056
1057 for (;;) {
1058 if ((error = pmc_attach_one_process(p, pm)) != 0)
1059 break;
1060 if (!LIST_EMPTY(&p->p_children))
1061 p = LIST_FIRST(&p->p_children);
1062 else for (;;) {
1063 if (p == top)
1064 goto done;
1065 if (LIST_NEXT(p, p_sibling)) {
1066 p = LIST_NEXT(p, p_sibling);
1067 break;
1068 }
1069 p = p->p_pptr;
1070 }
1071 }
1072
1073 if (error)
1074 (void) pmc_detach_process(top, pm);
1075
1076 done:
1077 sx_sunlock(&proctree_lock);
1078 return error;
1079}
1080
1081/*
1082 * Detach a process from a PMC. If there are no other PMCs tracking
1083 * this process, remove the process structure from its hash table. If
1084 * 'flags' contains PMC_FLAG_REMOVE, then free the process structure.
1085 */
1086
1087static int
1088pmc_detach_one_process(struct proc *p, struct pmc *pm, int flags)
1089{
1090 int ri;
1091 struct pmc_process *pp;
1092
1093 sx_assert(&pmc_sx, SX_XLOCKED);
1094
1095 KASSERT(pm != NULL,
1096 ("[pmc,%d] null pm pointer", __LINE__));
1097
1098 ri = PMC_TO_ROWINDEX(pm);
1099
1100 PMCDBG(PRC,ATT,2, "detach-one pm=%p ri=%d proc=%p (%d, %s) flags=0x%x",
1101 pm, ri, p, p->p_pid, p->p_comm, flags);
1102
1103 if ((pp = pmc_find_process_descriptor(p, 0)) == NULL)
1104 return ESRCH;
1105
1106 if (pp->pp_pmcs[ri].pp_pmc != pm)
1107 return EINVAL;
1108
1109 pmc_unlink_target_process(pm, pp);
1110
1111 /* Issue a detach entry if a log file is configured */
1112 if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE)
1113 pmclog_process_pmcdetach(pm, p->p_pid);
1114
1115 /*
1116 * If there are no PMCs targetting this process, we remove its
1117 * descriptor from the target hash table and unset the P_HWPMC
1118 * flag in the struct proc.
1119 */
1120 KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= (int) md->pmd_npmc,
1121 ("[pmc,%d] Illegal refcnt %d for process struct %p",
1122 __LINE__, pp->pp_refcnt, pp));
1123
1124 if (pp->pp_refcnt != 0) /* still a target of some PMC */
1125 return 0;
1126
1127 pmc_remove_process_descriptor(pp);
1128
1129 if (flags & PMC_FLAG_REMOVE)
1130 free(pp, M_PMC);
1131
1132 PROC_LOCK(p);
1133 p->p_flag &= ~P_HWPMC;
1134 PROC_UNLOCK(p);
1135
1136 return 0;
1137}
1138
1139/*
1140 * Detach a process and optionally its descendants from a PMC.
1141 */
1142
1143static int
1144pmc_detach_process(struct proc *p, struct pmc *pm)
1145{
1146 struct proc *top;
1147
1148 sx_assert(&pmc_sx, SX_XLOCKED);
1149
1150 PMCDBG(PRC,ATT,1, "detach pm=%p ri=%d proc=%p (%d, %s)", pm,
1151 PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm);
1152
1153 if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0)
1154 return pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE);
1155
1156 /*
1157 * Traverse all children, detaching them from this PMC. We
1158 * ignore errors since we could be detaching a PMC from a
1159 * partially attached proc tree.
1160 */
1161
1162 sx_slock(&proctree_lock);
1163
1164 top = p;
1165
1166 for (;;) {
1167 (void) pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE);
1168
1169 if (!LIST_EMPTY(&p->p_children))
1170 p = LIST_FIRST(&p->p_children);
1171 else for (;;) {
1172 if (p == top)
1173 goto done;
1174 if (LIST_NEXT(p, p_sibling)) {
1175 p = LIST_NEXT(p, p_sibling);
1176 break;
1177 }
1178 p = p->p_pptr;
1179 }
1180 }
1181
1182 done:
1183 sx_sunlock(&proctree_lock);
1184
1185 if (LIST_EMPTY(&pm->pm_targets))
1186 pm->pm_flags &= ~PMC_F_ATTACH_DONE;
1187
1188 return 0;
1189}
1190
1191
1192/*
1193 * Thread context switch IN
1194 */
1195
1196static void
1197pmc_process_csw_in(struct thread *td)
1198{
1199 int cpu;
1200 unsigned int adjri, ri;
1201 struct pmc *pm;
1202 struct proc *p;
1203 struct pmc_cpu *pc;
1204 struct pmc_hw *phw;
1205 pmc_value_t newvalue;
1206 struct pmc_process *pp;
1207 struct pmc_classdep *pcd;
1208
1209 p = td->td_proc;
1210
1211 if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE)) == NULL)
1212 return;
1213
1214 KASSERT(pp->pp_proc == td->td_proc,
1215 ("[pmc,%d] not my thread state", __LINE__));
1216
1217 critical_enter(); /* no preemption from this point */
1218
1219 cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */
1220
1221 PMCDBG(CSW,SWI,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p,
1222 p->p_pid, p->p_comm, pp);
1223
1224 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
1225 ("[pmc,%d] wierd CPU id %d", __LINE__, cpu));
1226
1227 pc = pmc_pcpu[cpu];
1228
1229 for (ri = 0; ri < md->pmd_npmc; ri++) {
1230
1231 if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL)
1232 continue;
1233
1234 KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)),
1235 ("[pmc,%d] Target PMC in non-virtual mode (%d)",
1236 __LINE__, PMC_TO_MODE(pm)));
1237
1238 KASSERT(PMC_TO_ROWINDEX(pm) == ri,
1239 ("[pmc,%d] Row index mismatch pmc %d != ri %d",
1240 __LINE__, PMC_TO_ROWINDEX(pm), ri));
1241
1242 /*
1243 * Only PMCs that are marked as 'RUNNING' need
1244 * be placed on hardware.
1245 */
1246
1247 if (pm->pm_state != PMC_STATE_RUNNING)
1248 continue;
1249
1250 /* increment PMC runcount */
1251 atomic_add_rel_int(&pm->pm_runcount, 1);
1252
1253 /* configure the HWPMC we are going to use. */
1254 pcd = pmc_ri_to_classdep(md, ri, &adjri);
1255 pcd->pcd_config_pmc(cpu, adjri, pm);
1256
1257 phw = pc->pc_hwpmcs[ri];
1258
1259 KASSERT(phw != NULL,
1260 ("[pmc,%d] null hw pointer", __LINE__));
1261
1262 KASSERT(phw->phw_pmc == pm,
1263 ("[pmc,%d] hw->pmc %p != pmc %p", __LINE__,
1264 phw->phw_pmc, pm));
1265
1266 /*
1267 * Write out saved value and start the PMC.
1268 *
1269 * Sampling PMCs use a per-process value, while
1270 * counting mode PMCs use a per-pmc value that is
1271 * inherited across descendants.
1272 */
1273 if (PMC_TO_MODE(pm) == PMC_MODE_TS) {
1274 mtx_pool_lock_spin(pmc_mtxpool, pm);
1275 newvalue = PMC_PCPU_SAVED(cpu,ri) =
1276 pp->pp_pmcs[ri].pp_pmcval;
1277 mtx_pool_unlock_spin(pmc_mtxpool, pm);
1278 } else {
1279 KASSERT(PMC_TO_MODE(pm) == PMC_MODE_TC,
1280 ("[pmc,%d] illegal mode=%d", __LINE__,
1281 PMC_TO_MODE(pm)));
1282 mtx_pool_lock_spin(pmc_mtxpool, pm);
1283 newvalue = PMC_PCPU_SAVED(cpu, ri) =
1284 pm->pm_gv.pm_savedvalue;
1285 mtx_pool_unlock_spin(pmc_mtxpool, pm);
1286 }
1287
1288 PMCDBG(CSW,SWI,1,"cpu=%d ri=%d new=%jd", cpu, ri, newvalue);
1289
1290 pcd->pcd_write_pmc(cpu, adjri, newvalue);
1291 pcd->pcd_start_pmc(cpu, adjri);
1292 }
1293
1294 /*
1295 * perform any other architecture/cpu dependent thread
1296 * switch-in actions.
1297 */
1298
1299 (void) (*md->pmd_switch_in)(pc, pp);
1300
1301 critical_exit();
1302
1303}
1304
1305/*
1306 * Thread context switch OUT.
1307 */
1308
1309static void
1310pmc_process_csw_out(struct thread *td)
1311{
1312 int cpu;
1313 int64_t tmp;
1314 struct pmc *pm;
1315 struct proc *p;
1316 enum pmc_mode mode;
1317 struct pmc_cpu *pc;
1318 pmc_value_t newvalue;
1319 unsigned int adjri, ri;
1320 struct pmc_process *pp;
1321 struct pmc_classdep *pcd;
1322
1323
1324 /*
1325 * Locate our process descriptor; this may be NULL if
1326 * this process is exiting and we have already removed
1327 * the process from the target process table.
1328 *
1329 * Note that due to kernel preemption, multiple
1330 * context switches may happen while the process is
1331 * exiting.
1332 *
1333 * Note also that if the target process cannot be
1334 * found we still need to deconfigure any PMCs that
1335 * are currently running on hardware.
1336 */
1337
1338 p = td->td_proc;
1339 pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE);
1340
1341 /*
1342 * save PMCs
1343 */
1344
1345 critical_enter();
1346
1347 cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */
1348
1349 PMCDBG(CSW,SWO,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p,
1350 p->p_pid, p->p_comm, pp);
1351
1352 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
1353 ("[pmc,%d wierd CPU id %d", __LINE__, cpu));
1354
1355 pc = pmc_pcpu[cpu];
1356
1357 /*
1358 * When a PMC gets unlinked from a target PMC, it will
1359 * be removed from the target's pp_pmc[] array.
1360 *
1361 * However, on a MP system, the target could have been
1362 * executing on another CPU at the time of the unlink.
1363 * So, at context switch OUT time, we need to look at
1364 * the hardware to determine if a PMC is scheduled on
1365 * it.
1366 */
1367
1368 for (ri = 0; ri < md->pmd_npmc; ri++) {
1369
1370 pcd = pmc_ri_to_classdep(md, ri, &adjri);
1371 pm = NULL;
1372 (void) (*pcd->pcd_get_config)(cpu, adjri, &pm);
1373
1374 if (pm == NULL) /* nothing at this row index */
1375 continue;
1376
1377 mode = PMC_TO_MODE(pm);
1378 if (!PMC_IS_VIRTUAL_MODE(mode))
1379 continue; /* not a process virtual PMC */
1380
1381 KASSERT(PMC_TO_ROWINDEX(pm) == ri,
1382 ("[pmc,%d] ri mismatch pmc(%d) ri(%d)",
1383 __LINE__, PMC_TO_ROWINDEX(pm), ri));
1384
1385 /* Stop hardware if not already stopped */
1386 if (pm->pm_stalled == 0)
1387 pcd->pcd_stop_pmc(cpu, adjri);
1388
1389 /* reduce this PMC's runcount */
1390 atomic_subtract_rel_int(&pm->pm_runcount, 1);
1391
1392 /*
1393 * If this PMC is associated with this process,
1394 * save the reading.
1395 */
1396
1397 if (pp != NULL && pp->pp_pmcs[ri].pp_pmc != NULL) {
1398
1399 KASSERT(pm == pp->pp_pmcs[ri].pp_pmc,
1400 ("[pmc,%d] pm %p != pp_pmcs[%d] %p", __LINE__,
1401 pm, ri, pp->pp_pmcs[ri].pp_pmc));
1402
1403 KASSERT(pp->pp_refcnt > 0,
1404 ("[pmc,%d] pp refcnt = %d", __LINE__,
1405 pp->pp_refcnt));
1406
1407 pcd->pcd_read_pmc(cpu, adjri, &newvalue);
1408
1409 tmp = newvalue - PMC_PCPU_SAVED(cpu,ri);
1410
1411 PMCDBG(CSW,SWO,1,"cpu=%d ri=%d tmp=%jd", cpu, ri,
1412 tmp);
1413
1414 if (mode == PMC_MODE_TS) {
1415
1416 /*
1417 * For sampling process-virtual PMCs,
1418 * we expect the count to be
1419 * decreasing as the 'value'
1420 * programmed into the PMC is the
1421 * number of events to be seen till
1422 * the next sampling interrupt.
1423 */
1424 if (tmp < 0)
1425 tmp += pm->pm_sc.pm_reloadcount;
1426 mtx_pool_lock_spin(pmc_mtxpool, pm);
1427 pp->pp_pmcs[ri].pp_pmcval -= tmp;
1428 if ((int64_t) pp->pp_pmcs[ri].pp_pmcval < 0)
1429 pp->pp_pmcs[ri].pp_pmcval +=
1430 pm->pm_sc.pm_reloadcount;
1431 mtx_pool_unlock_spin(pmc_mtxpool, pm);
1432
1433 } else {
1434
1435 /*
1436 * For counting process-virtual PMCs,
1437 * we expect the count to be
1438 * increasing monotonically, modulo a 64
1439 * bit wraparound.
1440 */
1441 KASSERT((int64_t) tmp >= 0,
1442 ("[pmc,%d] negative increment cpu=%d "
1443 "ri=%d newvalue=%jx saved=%jx "
1444 "incr=%jx", __LINE__, cpu, ri,
1445 newvalue, PMC_PCPU_SAVED(cpu,ri), tmp));
1446
1447 mtx_pool_lock_spin(pmc_mtxpool, pm);
1448 pm->pm_gv.pm_savedvalue += tmp;
1449 pp->pp_pmcs[ri].pp_pmcval += tmp;
1450 mtx_pool_unlock_spin(pmc_mtxpool, pm);
1451
1452 if (pm->pm_flags & PMC_F_LOG_PROCCSW)
1453 pmclog_process_proccsw(pm, pp, tmp);
1454 }
1455 }
1456
1457 /* mark hardware as free */
1458 pcd->pcd_config_pmc(cpu, adjri, NULL);
1459 }
1460
1461 /*
1462 * perform any other architecture/cpu dependent thread
1463 * switch out functions.
1464 */
1465
1466 (void) (*md->pmd_switch_out)(pc, pp);
1467
1468 critical_exit();
1469}
1470
1471/*
1472 * Log a KLD operation.
1473 */
1474
1475static void
1476pmc_process_kld_load(struct pmckern_map_in *pkm)
1477{
1478 struct pmc_owner *po;
1479
1480 sx_assert(&pmc_sx, SX_LOCKED);
1481
1482 /*
1483 * Notify owners of system sampling PMCs about KLD operations.
1484 */
1485
1486 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
1487 if (po->po_flags & PMC_PO_OWNS_LOGFILE)
1488 pmclog_process_map_in(po, (pid_t) -1, pkm->pm_address,
1489 (char *) pkm->pm_file);
1490
1491 /*
1492 * TODO: Notify owners of (all) process-sampling PMCs too.
1493 */
1494
1495 return;
1496}
1497
1498static void
1499pmc_process_kld_unload(struct pmckern_map_out *pkm)
1500{
1501 struct pmc_owner *po;
1502
1503 sx_assert(&pmc_sx, SX_LOCKED);
1504
1505 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
1506 if (po->po_flags & PMC_PO_OWNS_LOGFILE)
1507 pmclog_process_map_out(po, (pid_t) -1,
1508 pkm->pm_address, pkm->pm_address + pkm->pm_size);
1509
1510 /*
1511 * TODO: Notify owners of process-sampling PMCs.
1512 */
1513}
1514
1515/*
1516 * A mapping change for a process.
1517 */
1518
1519static void
1520pmc_process_mmap(struct thread *td, struct pmckern_map_in *pkm)
1521{
1522 int ri;
1523 pid_t pid;
1524 char *fullpath, *freepath;
1525 const struct pmc *pm;
1526 struct pmc_owner *po;
1527 const struct pmc_process *pp;
1528
1529 freepath = fullpath = NULL;
1530 pmc_getfilename((struct vnode *) pkm->pm_file, &fullpath, &freepath);
1531
1532 pid = td->td_proc->p_pid;
1533
1534 /* Inform owners of all system-wide sampling PMCs. */
1535 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
1536 if (po->po_flags & PMC_PO_OWNS_LOGFILE)
1537 pmclog_process_map_in(po, pid, pkm->pm_address, fullpath);
1538
1539 if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL)
1540 goto done;
1541
1542 /*
1543 * Inform sampling PMC owners tracking this process.
1544 */
1545 for (ri = 0; ri < md->pmd_npmc; ri++)
1546 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL &&
1547 PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1548 pmclog_process_map_in(pm->pm_owner,
1549 pid, pkm->pm_address, fullpath);
1550
1551 done:
1552 if (freepath)
1553 free(freepath, M_TEMP);
1554}
1555
1556
1557/*
1558 * Log an munmap request.
1559 */
1560
1561static void
1562pmc_process_munmap(struct thread *td, struct pmckern_map_out *pkm)
1563{
1564 int ri;
1565 pid_t pid;
1566 struct pmc_owner *po;
1567 const struct pmc *pm;
1568 const struct pmc_process *pp;
1569
1570 pid = td->td_proc->p_pid;
1571
1572 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
1573 if (po->po_flags & PMC_PO_OWNS_LOGFILE)
1574 pmclog_process_map_out(po, pid, pkm->pm_address,
1575 pkm->pm_address + pkm->pm_size);
1576
1577 if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL)
1578 return;
1579
1580 for (ri = 0; ri < md->pmd_npmc; ri++)
1581 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL &&
1582 PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1583 pmclog_process_map_out(pm->pm_owner, pid,
1584 pkm->pm_address, pkm->pm_address + pkm->pm_size);
1585}
1586
1587/*
1588 * Log mapping information about the kernel.
1589 */
1590
1591static void
1592pmc_log_kernel_mappings(struct pmc *pm)
1593{
1594 struct pmc_owner *po;
1595 struct pmckern_map_in *km, *kmbase;
1596
1597 sx_assert(&pmc_sx, SX_LOCKED);
1598 KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)),
1599 ("[pmc,%d] non-sampling PMC (%p) desires mapping information",
1600 __LINE__, (void *) pm));
1601
1602 po = pm->pm_owner;
1603
1604 if (po->po_flags & PMC_PO_INITIAL_MAPPINGS_DONE)
1605 return;
1606
1607 /*
1608 * Log the current set of kernel modules.
1609 */
1610 kmbase = linker_hwpmc_list_objects();
1611 for (km = kmbase; km->pm_file != NULL; km++) {
1612 PMCDBG(LOG,REG,1,"%s %p", (char *) km->pm_file,
1613 (void *) km->pm_address);
1614 pmclog_process_map_in(po, (pid_t) -1, km->pm_address,
1615 km->pm_file);
1616 }
1617 free(kmbase, M_LINKER);
1618
1619 po->po_flags |= PMC_PO_INITIAL_MAPPINGS_DONE;
1620}
1621
1622/*
1623 * Log the mappings for a single process.
1624 */
1625
1626static void
1627pmc_log_process_mappings(struct pmc_owner *po, struct proc *p)
1628{
1629 int locked;
1630 vm_map_t map;
1631 struct vnode *vp;
1632 struct vmspace *vm;
1633 vm_map_entry_t entry;
1634 vm_offset_t last_end;
1635 u_int last_timestamp;
1636 struct vnode *last_vp;
1637 vm_offset_t start_addr;
1638 vm_object_t obj, lobj, tobj;
1639 char *fullpath, *freepath;
1640
1641 last_vp = NULL;
1642 last_end = (vm_offset_t) 0;
1643 fullpath = freepath = NULL;
1644
1645 if ((vm = vmspace_acquire_ref(p)) == NULL)
1646 return;
1647
1648 map = &vm->vm_map;
1649 vm_map_lock_read(map);
1650
1651 for (entry = map->header.next; entry != &map->header; entry = entry->next) {
1652
1653 if (entry == NULL) {
1654 PMCDBG(LOG,OPS,2, "hwpmc: vm_map entry unexpectedly "
1655 "NULL! pid=%d vm_map=%p\n", p->p_pid, map);
1656 break;
1657 }
1658
1659 /*
1660 * We only care about executable map entries.
1661 */
1662 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) ||
1663 !(entry->protection & VM_PROT_EXECUTE) ||
1664 (entry->object.vm_object == NULL)) {
1665 continue;
1666 }
1667
1668 obj = entry->object.vm_object;
1669 VM_OBJECT_LOCK(obj);
1670
1671 /*
1672 * Walk the backing_object list to find the base
1673 * (non-shadowed) vm_object.
1674 */
1675 for (lobj = tobj = obj; tobj != NULL; tobj = tobj->backing_object) {
1676 if (tobj != obj)
1677 VM_OBJECT_LOCK(tobj);
1678 if (lobj != obj)
1679 VM_OBJECT_UNLOCK(lobj);
1680 lobj = tobj;
1681 }
1682
1683 /*
1684 * At this point lobj is the base vm_object and it is locked.
1685 */
1686 if (lobj == NULL) {
1687 PMCDBG(LOG,OPS,2, "hwpmc: lobj unexpectedly NULL! pid=%d "
1688 "vm_map=%p vm_obj=%p\n", p->p_pid, map, obj);
1689 VM_OBJECT_UNLOCK(obj);
1690 continue;
1691 }
1692
1693 if (lobj->type != OBJT_VNODE || lobj->handle == NULL) {
1694 if (lobj != obj)
1695 VM_OBJECT_UNLOCK(lobj);
1696 VM_OBJECT_UNLOCK(obj);
1697 continue;
1698 }
1699
1700 /*
1701 * Skip contiguous regions that point to the same
1702 * vnode, so we don't emit redundant MAP-IN
1703 * directives.
1704 */
1705 if (entry->start == last_end && lobj->handle == last_vp) {
1706 last_end = entry->end;
1707 if (lobj != obj)
1708 VM_OBJECT_UNLOCK(lobj);
1709 VM_OBJECT_UNLOCK(obj);
1710 continue;
1711 }
1712
1713 /*
1714 * We don't want to keep the proc's vm_map or this
1715 * vm_object locked while we walk the pathname, since
1716 * vn_fullpath() can sleep. However, if we drop the
1717 * lock, it's possible for concurrent activity to
1718 * modify the vm_map list. To protect against this,
1719 * we save the vm_map timestamp before we release the
1720 * lock, and check it after we reacquire the lock
1721 * below.
1722 */
1723 start_addr = entry->start;
1724 last_end = entry->end;
1725 last_timestamp = map->timestamp;
1726 vm_map_unlock_read(map);
1727
1728 vp = lobj->handle;
1729 vref(vp);
1730 if (lobj != obj)
1731 VM_OBJECT_UNLOCK(lobj);
1732
1733 VM_OBJECT_UNLOCK(obj);
1734
1735 freepath = NULL;
1736 pmc_getfilename(vp, &fullpath, &freepath);
1737 last_vp = vp;
1738
1739 locked = VFS_LOCK_GIANT(vp->v_mount);
1740 vrele(vp);
1741 VFS_UNLOCK_GIANT(locked);
1742
1743 vp = NULL;
1744 pmclog_process_map_in(po, p->p_pid, start_addr, fullpath);
1745 if (freepath)
1746 free(freepath, M_TEMP);
1747
1748 vm_map_lock_read(map);
1749
1750 /*
1751 * If our saved timestamp doesn't match, this means
1752 * that the vm_map was modified out from under us and
1753 * we can't trust our current "entry" pointer. Do a
1754 * new lookup for this entry. If there is no entry
1755 * for this address range, vm_map_lookup_entry() will
1756 * return the previous one, so we always want to go to
1757 * entry->next on the next loop iteration.
1758 *
1759 * There is an edge condition here that can occur if
1760 * there is no entry at or before this address. In
1761 * this situation, vm_map_lookup_entry returns
1762 * &map->header, which would cause our loop to abort
1763 * without processing the rest of the map. However,
1764 * in practice this will never happen for process
1765 * vm_map. This is because the executable's text
1766 * segment is the first mapping in the proc's address
1767 * space, and this mapping is never removed until the
1768 * process exits, so there will always be a non-header
1769 * entry at or before the requested address for
1770 * vm_map_lookup_entry to return.
1771 */
1772 if (map->timestamp != last_timestamp)
1773 vm_map_lookup_entry(map, last_end - 1, &entry);
1774 }
1775
1776 vm_map_unlock_read(map);
1777 vmspace_free(vm);
1778 return;
1779}
1780
1781/*
1782 * Log mappings for all processes in the system.
1783 */
1784
1785static void
1786pmc_log_all_process_mappings(struct pmc_owner *po)
1787{
1788 struct proc *p, *top;
1789
1790 sx_assert(&pmc_sx, SX_XLOCKED);
1791
1792 if ((p = pfind(1)) == NULL)
1793 panic("[pmc,%d] Cannot find init", __LINE__);
1794
1795 PROC_UNLOCK(p);
1796
1797 sx_slock(&proctree_lock);
1798
1799 top = p;
1800
1801 for (;;) {
1802 pmc_log_process_mappings(po, p);
1803 if (!LIST_EMPTY(&p->p_children))
1804 p = LIST_FIRST(&p->p_children);
1805 else for (;;) {
1806 if (p == top)
1807 goto done;
1808 if (LIST_NEXT(p, p_sibling)) {
1809 p = LIST_NEXT(p, p_sibling);
1810 break;
1811 }
1812 p = p->p_pptr;
1813 }
1814 }
1815 done:
1816 sx_sunlock(&proctree_lock);
1817}
1818
1819/*
1820 * The 'hook' invoked from the kernel proper
1821 */
1822
1823
1824#ifdef DEBUG
1825const char *pmc_hooknames[] = {
1826 /* these strings correspond to PMC_FN_* in <sys/pmckern.h> */
1827 "",
1828 "EXEC",
1829 "CSW-IN",
1830 "CSW-OUT",
1831 "SAMPLE",
1832 "KLDLOAD",
1833 "KLDUNLOAD",
1834 "MMAP",
1835 "MUNMAP",
1836 "CALLCHAIN"
1837};
1838#endif
1839
1840static int
1841pmc_hook_handler(struct thread *td, int function, void *arg)
1842{
1843
1844 PMCDBG(MOD,PMH,1, "hook td=%p func=%d \"%s\" arg=%p", td, function,
1845 pmc_hooknames[function], arg);
1846
1847 switch (function)
1848 {
1849
1850 /*
1851 * Process exec()
1852 */
1853
1854 case PMC_FN_PROCESS_EXEC:
1855 {
1856 char *fullpath, *freepath;
1857 unsigned int ri;
1858 int is_using_hwpmcs;
1859 struct pmc *pm;
1860 struct proc *p;
1861 struct pmc_owner *po;
1862 struct pmc_process *pp;
1863 struct pmckern_procexec *pk;
1864
1865 sx_assert(&pmc_sx, SX_XLOCKED);
1866
1867 p = td->td_proc;
1868 pmc_getfilename(p->p_textvp, &fullpath, &freepath);
1869
1870 pk = (struct pmckern_procexec *) arg;
1871
1872 /* Inform owners of SS mode PMCs of the exec event. */
1873 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
1874 if (po->po_flags & PMC_PO_OWNS_LOGFILE)
1875 pmclog_process_procexec(po, PMC_ID_INVALID,
1876 p->p_pid, pk->pm_entryaddr, fullpath);
1877
1878 PROC_LOCK(p);
1879 is_using_hwpmcs = p->p_flag & P_HWPMC;
1880 PROC_UNLOCK(p);
1881
1882 if (!is_using_hwpmcs) {
1883 if (freepath)
1884 free(freepath, M_TEMP);
1885 break;
1886 }
1887
1888 /*
1889 * PMCs are not inherited across an exec(): remove any
1890 * PMCs that this process is the owner of.
1891 */
1892
1893 if ((po = pmc_find_owner_descriptor(p)) != NULL) {
1894 pmc_remove_owner(po);
1895 pmc_destroy_owner_descriptor(po);
1896 }
1897
1898 /*
1899 * If the process being exec'ed is not the target of any
1900 * PMC, we are done.
1901 */
1902 if ((pp = pmc_find_process_descriptor(p, 0)) == NULL) {
1903 if (freepath)
1904 free(freepath, M_TEMP);
1905 break;
1906 }
1907
1908 /*
1909 * Log the exec event to all monitoring owners. Skip
1910 * owners who have already recieved the event because
1911 * they had system sampling PMCs active.
1912 */
1913 for (ri = 0; ri < md->pmd_npmc; ri++)
1914 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) {
1915 po = pm->pm_owner;
1916 if (po->po_sscount == 0 &&
1917 po->po_flags & PMC_PO_OWNS_LOGFILE)
1918 pmclog_process_procexec(po, pm->pm_id,
1919 p->p_pid, pk->pm_entryaddr,
1920 fullpath);
1921 }
1922
1923 if (freepath)
1924 free(freepath, M_TEMP);
1925
1926
1927 PMCDBG(PRC,EXC,1, "exec proc=%p (%d, %s) cred-changed=%d",
1928 p, p->p_pid, p->p_comm, pk->pm_credentialschanged);
1929
1930 if (pk->pm_credentialschanged == 0) /* no change */
1931 break;
1932
1933 /*
1934 * If the newly exec()'ed process has a different credential
1935 * than before, allow it to be the target of a PMC only if
1936 * the PMC's owner has sufficient priviledge.
1937 */
1938
1939 for (ri = 0; ri < md->pmd_npmc; ri++)
1940 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL)
1941 if (pmc_can_attach(pm, td->td_proc) != 0)
1942 pmc_detach_one_process(td->td_proc,
1943 pm, PMC_FLAG_NONE);
1944
1945 KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= (int) md->pmd_npmc,
1946 ("[pmc,%d] Illegal ref count %d on pp %p", __LINE__,
1947 pp->pp_refcnt, pp));
1948
1949 /*
1950 * If this process is no longer the target of any
1951 * PMCs, we can remove the process entry and free
1952 * up space.
1953 */
1954
1955 if (pp->pp_refcnt == 0) {
1956 pmc_remove_process_descriptor(pp);
1957 free(pp, M_PMC);
1958 break;
1959 }
1960
1961 }
1962 break;
1963
1964 case PMC_FN_CSW_IN:
1965 pmc_process_csw_in(td);
1966 break;
1967
1968 case PMC_FN_CSW_OUT:
1969 pmc_process_csw_out(td);
1970 break;
1971
1972 /*
1973 * Process accumulated PC samples.
1974 *
1975 * This function is expected to be called by hardclock() for
1976 * each CPU that has accumulated PC samples.
1977 *
1978 * This function is to be executed on the CPU whose samples
1979 * are being processed.
1980 */
1981 case PMC_FN_DO_SAMPLES:
1982
1983 /*
1984 * Clear the cpu specific bit in the CPU mask before
1985 * do the rest of the processing. If the NMI handler
1986 * gets invoked after the "atomic_clear_int()" call
1987 * below but before "pmc_process_samples()" gets
1988 * around to processing the interrupt, then we will
1989 * come back here at the next hardclock() tick (and
1990 * may find nothing to do if "pmc_process_samples()"
1991 * had already processed the interrupt). We don't
1992 * lose the interrupt sample.
1993 */
1994 CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmc_cpumask);
1995 pmc_process_samples(PCPU_GET(cpuid));
1996 break;
1997
1998
1999 case PMC_FN_KLD_LOAD:
2000 sx_assert(&pmc_sx, SX_LOCKED);
2001 pmc_process_kld_load((struct pmckern_map_in *) arg);
2002 break;
2003
2004 case PMC_FN_KLD_UNLOAD:
2005 sx_assert(&pmc_sx, SX_LOCKED);
2006 pmc_process_kld_unload((struct pmckern_map_out *) arg);
2007 break;
2008
2009 case PMC_FN_MMAP:
2010 sx_assert(&pmc_sx, SX_LOCKED);
2011 pmc_process_mmap(td, (struct pmckern_map_in *) arg);
2012 break;
2013
2014 case PMC_FN_MUNMAP:
2015 sx_assert(&pmc_sx, SX_LOCKED);
2016 pmc_process_munmap(td, (struct pmckern_map_out *) arg);
2017 break;
2018
2019 case PMC_FN_USER_CALLCHAIN:
2020 /*
2021 * Record a call chain.
2022 */
2023 KASSERT(td == curthread, ("[pmc,%d] td != curthread",
2024 __LINE__));
2025 pmc_capture_user_callchain(PCPU_GET(cpuid),
2026 (struct trapframe *) arg);
2027 td->td_pflags &= ~TDP_CALLCHAIN;
2028 break;
2029
2030 default:
2031#ifdef DEBUG
2032 KASSERT(0, ("[pmc,%d] unknown hook %d\n", __LINE__, function));
2033#endif
2034 break;
2035
2036 }
2037
2038 return 0;
2039}
2040
2041/*
2042 * allocate a 'struct pmc_owner' descriptor in the owner hash table.
2043 */
2044
2045static struct pmc_owner *
2046pmc_allocate_owner_descriptor(struct proc *p)
2047{
2048 uint32_t hindex;
2049 struct pmc_owner *po;
2050 struct pmc_ownerhash *poh;
2051
2052 hindex = PMC_HASH_PTR(p, pmc_ownerhashmask);
2053 poh = &pmc_ownerhash[hindex];
2054
2055 /* allocate space for N pointers and one descriptor struct */
2056 po = malloc(sizeof(struct pmc_owner), M_PMC, M_WAITOK|M_ZERO);
2057 po->po_sscount = po->po_error = po->po_flags = po->po_logprocmaps = 0;
2058 po->po_file = NULL;
2059 po->po_owner = p;
2060 po->po_kthread = NULL;
2061 LIST_INIT(&po->po_pmcs);
2062 LIST_INSERT_HEAD(poh, po, po_next); /* insert into hash table */
2063
2064 TAILQ_INIT(&po->po_logbuffers);
2065 mtx_init(&po->po_mtx, "pmc-owner-mtx", "pmc-per-proc", MTX_SPIN);
2066
2067 PMCDBG(OWN,ALL,1, "allocate-owner proc=%p (%d, %s) pmc-owner=%p",
2068 p, p->p_pid, p->p_comm, po);
2069
2070 return po;
2071}
2072
2073static void
2074pmc_destroy_owner_descriptor(struct pmc_owner *po)
2075{
2076
2077 PMCDBG(OWN,REL,1, "destroy-owner po=%p proc=%p (%d, %s)",
2078 po, po->po_owner, po->po_owner->p_pid, po->po_owner->p_comm);
2079
2080 mtx_destroy(&po->po_mtx);
2081 free(po, M_PMC);
2082}
2083
2084/*
2085 * find the descriptor corresponding to process 'p', adding or removing it
2086 * as specified by 'mode'.
2087 */
2088
2089static struct pmc_process *
2090pmc_find_process_descriptor(struct proc *p, uint32_t mode)
2091{
2092 uint32_t hindex;
2093 struct pmc_process *pp, *ppnew;
2094 struct pmc_processhash *pph;
2095
2096 hindex = PMC_HASH_PTR(p, pmc_processhashmask);
2097 pph = &pmc_processhash[hindex];
2098
2099 ppnew = NULL;
2100
2101 /*
2102 * Pre-allocate memory in the FIND_ALLOCATE case since we
2103 * cannot call malloc(9) once we hold a spin lock.
2104 */
2105 if (mode & PMC_FLAG_ALLOCATE)
2106 ppnew = malloc(sizeof(struct pmc_process) + md->pmd_npmc *
2107 sizeof(struct pmc_targetstate), M_PMC, M_WAITOK|M_ZERO);
2108
2109 mtx_lock_spin(&pmc_processhash_mtx);
2110 LIST_FOREACH(pp, pph, pp_next)
2111 if (pp->pp_proc == p)
2112 break;
2113
2114 if ((mode & PMC_FLAG_REMOVE) && pp != NULL)
2115 LIST_REMOVE(pp, pp_next);
2116
2117 if ((mode & PMC_FLAG_ALLOCATE) && pp == NULL &&
2118 ppnew != NULL) {
2119 ppnew->pp_proc = p;
2120 LIST_INSERT_HEAD(pph, ppnew, pp_next);
2121 pp = ppnew;
2122 ppnew = NULL;
2123 }
2124 mtx_unlock_spin(&pmc_processhash_mtx);
2125
2126 if (pp != NULL && ppnew != NULL)
2127 free(ppnew, M_PMC);
2128
2129 return pp;
2130}
2131
2132/*
2133 * remove a process descriptor from the process hash table.
2134 */
2135
2136static void
2137pmc_remove_process_descriptor(struct pmc_process *pp)
2138{
2139 KASSERT(pp->pp_refcnt == 0,
2140 ("[pmc,%d] Removing process descriptor %p with count %d",
2141 __LINE__, pp, pp->pp_refcnt));
2142
2143 mtx_lock_spin(&pmc_processhash_mtx);
2144 LIST_REMOVE(pp, pp_next);
2145 mtx_unlock_spin(&pmc_processhash_mtx);
2146}
2147
2148
2149/*
2150 * find an owner descriptor corresponding to proc 'p'
2151 */
2152
2153static struct pmc_owner *
2154pmc_find_owner_descriptor(struct proc *p)
2155{
2156 uint32_t hindex;
2157 struct pmc_owner *po;
2158 struct pmc_ownerhash *poh;
2159
2160 hindex = PMC_HASH_PTR(p, pmc_ownerhashmask);
2161 poh = &pmc_ownerhash[hindex];
2162
2163 po = NULL;
2164 LIST_FOREACH(po, poh, po_next)
2165 if (po->po_owner == p)
2166 break;
2167
2168 PMCDBG(OWN,FND,1, "find-owner proc=%p (%d, %s) hindex=0x%x -> "
2169 "pmc-owner=%p", p, p->p_pid, p->p_comm, hindex, po);
2170
2171 return po;
2172}
2173
2174/*
2175 * pmc_allocate_pmc_descriptor
2176 *
2177 * Allocate a pmc descriptor and initialize its
2178 * fields.
2179 */
2180
2181static struct pmc *
2182pmc_allocate_pmc_descriptor(void)
2183{
2184 struct pmc *pmc;
2185
2186 pmc = malloc(sizeof(struct pmc), M_PMC, M_WAITOK|M_ZERO);
2187
2188 if (pmc != NULL) {
2189 pmc->pm_owner = NULL;
2190 LIST_INIT(&pmc->pm_targets);
2191 }
2192
2193 PMCDBG(PMC,ALL,1, "allocate-pmc -> pmc=%p", pmc);
2194
2195 return pmc;
2196}
2197
2198/*
2199 * Destroy a pmc descriptor.
2200 */
2201
2202static void
2203pmc_destroy_pmc_descriptor(struct pmc *pm)
2204{
2205 (void) pm;
2206
2207#ifdef DEBUG
2208 KASSERT(pm->pm_state == PMC_STATE_DELETED ||
2209 pm->pm_state == PMC_STATE_FREE,
2210 ("[pmc,%d] destroying non-deleted PMC", __LINE__));
2211 KASSERT(LIST_EMPTY(&pm->pm_targets),
2212 ("[pmc,%d] destroying pmc with targets", __LINE__));
2213 KASSERT(pm->pm_owner == NULL,
2214 ("[pmc,%d] destroying pmc attached to an owner", __LINE__));
2215 KASSERT(pm->pm_runcount == 0,
2216 ("[pmc,%d] pmc has non-zero run count %d", __LINE__,
2217 pm->pm_runcount));
2218#endif
2219}
2220
2221static void
2222pmc_wait_for_pmc_idle(struct pmc *pm)
2223{
2224#ifdef DEBUG
2225 volatile int maxloop;
2226
2227 maxloop = 100 * pmc_cpu_max();
2228#endif
2229
2230 /*
2231 * Loop (with a forced context switch) till the PMC's runcount
2232 * comes down to zero.
2233 */
2234 while (atomic_load_acq_32(&pm->pm_runcount) > 0) {
2235#ifdef DEBUG
2236 maxloop--;
2237 KASSERT(maxloop > 0,
2238 ("[pmc,%d] (ri%d, rc%d) waiting too long for "
2239 "pmc to be free", __LINE__,
2240 PMC_TO_ROWINDEX(pm), pm->pm_runcount));
2241#endif
2242 pmc_force_context_switch();
2243 }
2244}
2245
2246/*
2247 * This function does the following things:
2248 *
2249 * - detaches the PMC from hardware
2250 * - unlinks all target threads that were attached to it
2251 * - removes the PMC from its owner's list
2252 * - destroy's the PMC private mutex
2253 *
2254 * Once this function completes, the given pmc pointer can be safely
2255 * FREE'd by the caller.
2256 */
2257
2258static void
2259pmc_release_pmc_descriptor(struct pmc *pm)
2260{
2261 enum pmc_mode mode;
2262 struct pmc_hw *phw;
2263 u_int adjri, ri, cpu;
2264 struct pmc_owner *po;
2265 struct pmc_binding pb;
2266 struct pmc_process *pp;
2267 struct pmc_classdep *pcd;
2268 struct pmc_target *ptgt, *tmp;
2269
2270 sx_assert(&pmc_sx, SX_XLOCKED);
2271
2272 KASSERT(pm, ("[pmc,%d] null pmc", __LINE__));
2273
2274 ri = PMC_TO_ROWINDEX(pm);
2275 pcd = pmc_ri_to_classdep(md, ri, &adjri);
2276 mode = PMC_TO_MODE(pm);
2277
2278 PMCDBG(PMC,REL,1, "release-pmc pmc=%p ri=%d mode=%d", pm, ri,
2279 mode);
2280
2281 /*
2282 * First, we take the PMC off hardware.
2283 */
2284 cpu = 0;
2285 if (PMC_IS_SYSTEM_MODE(mode)) {
2286
2287 /*
2288 * A system mode PMC runs on a specific CPU. Switch
2289 * to this CPU and turn hardware off.
2290 */
2291 pmc_save_cpu_binding(&pb);
2292
2293 cpu = PMC_TO_CPU(pm);
2294
2295 pmc_select_cpu(cpu);
2296
2297 /* switch off non-stalled CPUs */
2298 if (pm->pm_state == PMC_STATE_RUNNING &&
2299 pm->pm_stalled == 0) {
2300
2301 phw = pmc_pcpu[cpu]->pc_hwpmcs[ri];
2302
2303 KASSERT(phw->phw_pmc == pm,
2304 ("[pmc, %d] pmc ptr ri(%d) hw(%p) pm(%p)",
2305 __LINE__, ri, phw->phw_pmc, pm));
2306 PMCDBG(PMC,REL,2, "stopping cpu=%d ri=%d", cpu, ri);
2307
2308 critical_enter();
2309 pcd->pcd_stop_pmc(cpu, adjri);
2310 critical_exit();
2311 }
2312
2313 PMCDBG(PMC,REL,2, "decfg cpu=%d ri=%d", cpu, ri);
2314
2315 critical_enter();
2316 pcd->pcd_config_pmc(cpu, adjri, NULL);
2317 critical_exit();
2318
2319 /* adjust the global and process count of SS mode PMCs */
2320 if (mode == PMC_MODE_SS && pm->pm_state == PMC_STATE_RUNNING) {
2321 po = pm->pm_owner;
2322 po->po_sscount--;
2323 if (po->po_sscount == 0) {
2324 atomic_subtract_rel_int(&pmc_ss_count, 1);
2325 LIST_REMOVE(po, po_ssnext);
2326 }
2327 }
2328
2329 pm->pm_state = PMC_STATE_DELETED;
2330
2331 pmc_restore_cpu_binding(&pb);
2332
2333 /*
2334 * We could have references to this PMC structure in
2335 * the per-cpu sample queues. Wait for the queue to
2336 * drain.
2337 */
2338 pmc_wait_for_pmc_idle(pm);
2339
2340 } else if (PMC_IS_VIRTUAL_MODE(mode)) {
2341
2342 /*
2343 * A virtual PMC could be running on multiple CPUs at
2344 * a given instant.
2345 *
2346 * By marking its state as DELETED, we ensure that
2347 * this PMC is never further scheduled on hardware.
2348 *
2349 * Then we wait till all CPUs are done with this PMC.
2350 */
2351 pm->pm_state = PMC_STATE_DELETED;
2352
2353
2354 /* Wait for the PMCs runcount to come to zero. */
2355 pmc_wait_for_pmc_idle(pm);
2356
2357 /*
2358 * At this point the PMC is off all CPUs and cannot be
2359 * freshly scheduled onto a CPU. It is now safe to
2360 * unlink all targets from this PMC. If a
2361 * process-record's refcount falls to zero, we remove
2362 * it from the hash table. The module-wide SX lock
2363 * protects us from races.
2364 */
2365 LIST_FOREACH_SAFE(ptgt, &pm->pm_targets, pt_next, tmp) {
2366 pp = ptgt->pt_process;
2367 pmc_unlink_target_process(pm, pp); /* frees 'ptgt' */
2368
2369 PMCDBG(PMC,REL,3, "pp->refcnt=%d", pp->pp_refcnt);
2370
2371 /*
2372 * If the target process record shows that no
2373 * PMCs are attached to it, reclaim its space.
2374 */
2375
2376 if (pp->pp_refcnt == 0) {
2377 pmc_remove_process_descriptor(pp);
2378 free(pp, M_PMC);
2379 }
2380 }
2381
2382 cpu = curthread->td_oncpu; /* setup cpu for pmd_release() */
2383
2384 }
2385
2386 /*
2387 * Release any MD resources
2388 */
2389 (void) pcd->pcd_release_pmc(cpu, adjri, pm);
2390
2391 /*
2392 * Update row disposition
2393 */
2394
2395 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm)))
2396 PMC_UNMARK_ROW_STANDALONE(ri);
2397 else
2398 PMC_UNMARK_ROW_THREAD(ri);
2399
2400 /* unlink from the owner's list */
2401 if (pm->pm_owner) {
2402 LIST_REMOVE(pm, pm_next);
2403 pm->pm_owner = NULL;
2404 }
2405
2406 pmc_destroy_pmc_descriptor(pm);
2407}
2408
2409/*
2410 * Register an owner and a pmc.
2411 */
2412
2413static int
2414pmc_register_owner(struct proc *p, struct pmc *pmc)
2415{
2416 struct pmc_owner *po;
2417
2418 sx_assert(&pmc_sx, SX_XLOCKED);
2419
2420 if ((po = pmc_find_owner_descriptor(p)) == NULL)
2421 if ((po = pmc_allocate_owner_descriptor(p)) == NULL)
2422 return ENOMEM;
2423
2424 KASSERT(pmc->pm_owner == NULL,
2425 ("[pmc,%d] attempting to own an initialized PMC", __LINE__));
2426 pmc->pm_owner = po;
2427
2428 LIST_INSERT_HEAD(&po->po_pmcs, pmc, pm_next);
2429
2430 PROC_LOCK(p);
2431 p->p_flag |= P_HWPMC;
2432 PROC_UNLOCK(p);
2433
2434 if (po->po_flags & PMC_PO_OWNS_LOGFILE)
2435 pmclog_process_pmcallocate(pmc);
2436
2437 PMCDBG(PMC,REG,1, "register-owner pmc-owner=%p pmc=%p",
2438 po, pmc);
2439
2440 return 0;
2441}
2442
2443/*
2444 * Return the current row disposition:
2445 * == 0 => FREE
2446 * > 0 => PROCESS MODE
2447 * < 0 => SYSTEM MODE
2448 */
2449
2450int
2451pmc_getrowdisp(int ri)
2452{
2453 return pmc_pmcdisp[ri];
2454}
2455
2456/*
2457 * Check if a PMC at row index 'ri' can be allocated to the current
2458 * process.
2459 *
2460 * Allocation can fail if:
2461 * - the current process is already being profiled by a PMC at index 'ri',
2462 * attached to it via OP_PMCATTACH.
2463 * - the current process has already allocated a PMC at index 'ri'
2464 * via OP_ALLOCATE.
2465 */
2466
2467static int
2468pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu)
2469{
2470 enum pmc_mode mode;
2471 struct pmc *pm;
2472 struct pmc_owner *po;
2473 struct pmc_process *pp;
2474
2475 PMCDBG(PMC,ALR,1, "can-allocate-rowindex proc=%p (%d, %s) ri=%d "
2476 "cpu=%d", p, p->p_pid, p->p_comm, ri, cpu);
2477
2478 /*
2479 * We shouldn't have already allocated a process-mode PMC at
2480 * row index 'ri'.
2481 *
2482 * We shouldn't have allocated a system-wide PMC on the same
2483 * CPU and same RI.
2484 */
2485 if ((po = pmc_find_owner_descriptor(p)) != NULL)
2486 LIST_FOREACH(pm, &po->po_pmcs, pm_next) {
2487 if (PMC_TO_ROWINDEX(pm) == ri) {
2488 mode = PMC_TO_MODE(pm);
2489 if (PMC_IS_VIRTUAL_MODE(mode))
2490 return EEXIST;
2491 if (PMC_IS_SYSTEM_MODE(mode) &&
2492 (int) PMC_TO_CPU(pm) == cpu)
2493 return EEXIST;
2494 }
2495 }
2496
2497 /*
2498 * We also shouldn't be the target of any PMC at this index
2499 * since otherwise a PMC_ATTACH to ourselves will fail.
2500 */
2501 if ((pp = pmc_find_process_descriptor(p, 0)) != NULL)
2502 if (pp->pp_pmcs[ri].pp_pmc)
2503 return EEXIST;
2504
2505 PMCDBG(PMC,ALR,2, "can-allocate-rowindex proc=%p (%d, %s) ri=%d ok",
2506 p, p->p_pid, p->p_comm, ri);
2507
2508 return 0;
2509}
2510
2511/*
2512 * Check if a given PMC at row index 'ri' can be currently used in
2513 * mode 'mode'.
2514 */
2515
2516static int
2517pmc_can_allocate_row(int ri, enum pmc_mode mode)
2518{
2519 enum pmc_disp disp;
2520
2521 sx_assert(&pmc_sx, SX_XLOCKED);
2522
2523 PMCDBG(PMC,ALR,1, "can-allocate-row ri=%d mode=%d", ri, mode);
2524
2525 if (PMC_IS_SYSTEM_MODE(mode))
2526 disp = PMC_DISP_STANDALONE;
2527 else
2528 disp = PMC_DISP_THREAD;
2529
2530 /*
2531 * check disposition for PMC row 'ri':
2532 *
2533 * Expected disposition Row-disposition Result
2534 *
2535 * STANDALONE STANDALONE or FREE proceed
2536 * STANDALONE THREAD fail
2537 * THREAD THREAD or FREE proceed
2538 * THREAD STANDALONE fail
2539 */
2540
2541 if (!PMC_ROW_DISP_IS_FREE(ri) &&
2542 !(disp == PMC_DISP_THREAD && PMC_ROW_DISP_IS_THREAD(ri)) &&
2543 !(disp == PMC_DISP_STANDALONE && PMC_ROW_DISP_IS_STANDALONE(ri)))
2544 return EBUSY;
2545
2546 /*
2547 * All OK
2548 */
2549
2550 PMCDBG(PMC,ALR,2, "can-allocate-row ri=%d mode=%d ok", ri, mode);
2551
2552 return 0;
2553
2554}
2555
2556/*
2557 * Find a PMC descriptor with user handle 'pmcid' for thread 'td'.
2558 */
2559
2560static struct pmc *
2561pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, pmc_id_t pmcid)
2562{
2563 struct pmc *pm;
2564
2565 KASSERT(PMC_ID_TO_ROWINDEX(pmcid) < md->pmd_npmc,
2566 ("[pmc,%d] Illegal pmc index %d (max %d)", __LINE__,
2567 PMC_ID_TO_ROWINDEX(pmcid), md->pmd_npmc));
2568
2569 LIST_FOREACH(pm, &po->po_pmcs, pm_next)
2570 if (pm->pm_id == pmcid)
2571 return pm;
2572
2573 return NULL;
2574}
2575
2576static int
2577pmc_find_pmc(pmc_id_t pmcid, struct pmc **pmc)
2578{
2579
2580 struct pmc *pm;
2581 struct pmc_owner *po;
2582
2583 PMCDBG(PMC,FND,1, "find-pmc id=%d", pmcid);
2584
2585 if ((po = pmc_find_owner_descriptor(curthread->td_proc)) == NULL)
2586 return ESRCH;
2587
2588 if ((pm = pmc_find_pmc_descriptor_in_process(po, pmcid)) == NULL)
2589 return EINVAL;
2590
2591 PMCDBG(PMC,FND,2, "find-pmc id=%d -> pmc=%p", pmcid, pm);
2592
2593 *pmc = pm;
2594 return 0;
2595}
2596
2597/*
2598 * Start a PMC.
2599 */
2600
2601static int
2602pmc_start(struct pmc *pm)
2603{
2604 enum pmc_mode mode;
2605 struct pmc_owner *po;
2606 struct pmc_binding pb;
2607 struct pmc_classdep *pcd;
2608 int adjri, error, cpu, ri;
2609
2610 KASSERT(pm != NULL,
2611 ("[pmc,%d] null pm", __LINE__));
2612
2613 mode = PMC_TO_MODE(pm);
2614 ri = PMC_TO_ROWINDEX(pm);
2615 pcd = pmc_ri_to_classdep(md, ri, &adjri);
2616
2617 error = 0;
2618
2619 PMCDBG(PMC,OPS,1, "start pmc=%p mode=%d ri=%d", pm, mode, ri);
2620
2621 po = pm->pm_owner;
2622
2623 /*
2624 * Disallow PMCSTART if a logfile is required but has not been
2625 * configured yet.
2626 */
2627 if ((pm->pm_flags & PMC_F_NEEDS_LOGFILE) &&
2628 (po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
2629 return (EDOOFUS); /* programming error */
2630
2631 /*
2632 * If this is a sampling mode PMC, log mapping information for
2633 * the kernel modules that are currently loaded.
2634 */
2635 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
2636 pmc_log_kernel_mappings(pm);
2637
2638 if (PMC_IS_VIRTUAL_MODE(mode)) {
2639
2640 /*
2641 * If a PMCATTACH has never been done on this PMC,
2642 * attach it to its owner process.
2643 */
2644
2645 if (LIST_EMPTY(&pm->pm_targets))
2646 error = (pm->pm_flags & PMC_F_ATTACH_DONE) ? ESRCH :
2647 pmc_attach_process(po->po_owner, pm);
2648
2649 /*
2650 * If the PMC is attached to its owner, then force a context
2651 * switch to ensure that the MD state gets set correctly.
2652 */
2653
2654 if (error == 0) {
2655 pm->pm_state = PMC_STATE_RUNNING;
2656 if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER)
2657 pmc_force_context_switch();
2658 }
2659
2660 return (error);
2661 }
2662
2663
2664 /*
2665 * A system-wide PMC.
2666 *
2667 * Add the owner to the global list if this is a system-wide
2668 * sampling PMC.
2669 */
2670
2671 if (mode == PMC_MODE_SS) {
2672 if (po->po_sscount == 0) {
2673 LIST_INSERT_HEAD(&pmc_ss_owners, po, po_ssnext);
2674 atomic_add_rel_int(&pmc_ss_count, 1);
2675 PMCDBG(PMC,OPS,1, "po=%p in global list", po);
2676 }
2677 po->po_sscount++;
2678
2679 /*
2680 * Log mapping information for all existing processes in the
2681 * system. Subsequent mappings are logged as they happen;
2682 * see pmc_process_mmap().
2683 */
2684 if (po->po_logprocmaps == 0) {
2685 pmc_log_all_process_mappings(po);
2686 po->po_logprocmaps = 1;
2687 }
2688 }
2689
2690 /*
2691 * Move to the CPU associated with this
2692 * PMC, and start the hardware.
2693 */
2694
2695 pmc_save_cpu_binding(&pb);
2696
2697 cpu = PMC_TO_CPU(pm);
2698
2699 if (!pmc_cpu_is_active(cpu))
2700 return (ENXIO);
2701
2702 pmc_select_cpu(cpu);
2703
2704 /*
2705 * global PMCs are configured at allocation time
2706 * so write out the initial value and start the PMC.
2707 */
2708
2709 pm->pm_state = PMC_STATE_RUNNING;
2710
2711 critical_enter();
2712 if ((error = pcd->pcd_write_pmc(cpu, adjri,
2713 PMC_IS_SAMPLING_MODE(mode) ?
2714 pm->pm_sc.pm_reloadcount :
2715 pm->pm_sc.pm_initial)) == 0)
2716 error = pcd->pcd_start_pmc(cpu, adjri);
2717 critical_exit();
2718
2719 pmc_restore_cpu_binding(&pb);
2720
2721 return (error);
2722}
2723
2724/*
2725 * Stop a PMC.
2726 */
2727
2728static int
2729pmc_stop(struct pmc *pm)
2730{
2731 struct pmc_owner *po;
2732 struct pmc_binding pb;
2733 struct pmc_classdep *pcd;
2734 int adjri, cpu, error, ri;
2735
2736 KASSERT(pm != NULL, ("[pmc,%d] null pmc", __LINE__));
2737
2738 PMCDBG(PMC,OPS,1, "stop pmc=%p mode=%d ri=%d", pm,
2739 PMC_TO_MODE(pm), PMC_TO_ROWINDEX(pm));
2740
2741 pm->pm_state = PMC_STATE_STOPPED;
2742
2743 /*
2744 * If the PMC is a virtual mode one, changing the state to
2745 * non-RUNNING is enough to ensure that the PMC never gets
2746 * scheduled.
2747 *
2748 * If this PMC is current running on a CPU, then it will
2749 * handled correctly at the time its target process is context
2750 * switched out.
2751 */
2752
2753 if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)))
2754 return 0;
2755
2756 /*
2757 * A system-mode PMC. Move to the CPU associated with
2758 * this PMC, and stop the hardware. We update the
2759 * 'initial count' so that a subsequent PMCSTART will
2760 * resume counting from the current hardware count.
2761 */
2762
2763 pmc_save_cpu_binding(&pb);
2764
2765 cpu = PMC_TO_CPU(pm);
2766
2767 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
2768 ("[pmc,%d] illegal cpu=%d", __LINE__, cpu));
2769
2770 if (!pmc_cpu_is_active(cpu))
2771 return ENXIO;
2772
2773 pmc_select_cpu(cpu);
2774
2775 ri = PMC_TO_ROWINDEX(pm);
2776 pcd = pmc_ri_to_classdep(md, ri, &adjri);
2777
2778 critical_enter();
2779 if ((error = pcd->pcd_stop_pmc(cpu, adjri)) == 0)
2780 error = pcd->pcd_read_pmc(cpu, adjri, &pm->pm_sc.pm_initial);
2781 critical_exit();
2782
2783 pmc_restore_cpu_binding(&pb);
2784
2785 po = pm->pm_owner;
2786
2787 /* remove this owner from the global list of SS PMC owners */
2788 if (PMC_TO_MODE(pm) == PMC_MODE_SS) {
2789 po->po_sscount--;
2790 if (po->po_sscount == 0) {
2791 atomic_subtract_rel_int(&pmc_ss_count, 1);
2792 LIST_REMOVE(po, po_ssnext);
2793 PMCDBG(PMC,OPS,2,"po=%p removed from global list", po);
2794 }
2795 }
2796
2797 return (error);
2798}
2799
2800
2801#ifdef DEBUG
2802static const char *pmc_op_to_name[] = {
2803#undef __PMC_OP
2804#define __PMC_OP(N, D) #N ,
2805 __PMC_OPS()
2806 NULL
2807};
2808#endif
2809
2810/*
2811 * The syscall interface
2812 */
2813
2814#define PMC_GET_SX_XLOCK(...) do { \
2815 sx_xlock(&pmc_sx); \
2816 if (pmc_hook == NULL) { \
2817 sx_xunlock(&pmc_sx); \
2818 return __VA_ARGS__; \
2819 } \
2820} while (0)
2821
2822#define PMC_DOWNGRADE_SX() do { \
2823 sx_downgrade(&pmc_sx); \
2824 is_sx_downgraded = 1; \
2825} while (0)
2826
2827static int
2828pmc_syscall_handler(struct thread *td, void *syscall_args)
2829{
2830 int error, is_sx_downgraded, is_sx_locked, op;
2831 struct pmc_syscall_args *c;
2832 void *arg;
2833
2834 PMC_GET_SX_XLOCK(ENOSYS);
2835
2836 DROP_GIANT();
2837
2838 is_sx_downgraded = 0;
2839 is_sx_locked = 1;
2840
2841 c = (struct pmc_syscall_args *) syscall_args;
2842
2843 op = c->pmop_code;
2844 arg = c->pmop_data;
2845
2846 PMCDBG(MOD,PMS,1, "syscall op=%d \"%s\" arg=%p", op,
2847 pmc_op_to_name[op], arg);
2848
2849 error = 0;
2850 atomic_add_int(&pmc_stats.pm_syscalls, 1);
2851
2852 switch(op)
2853 {
2854
2855
2856 /*
2857 * Configure a log file.
2858 *
2859 * XXX This OP will be reworked.
2860 */
2861
2862 case PMC_OP_CONFIGURELOG:
2863 {
2864 struct proc *p;
2865 struct pmc *pm;
2866 struct pmc_owner *po;
2867 struct pmc_op_configurelog cl;
2868
2869 sx_assert(&pmc_sx, SX_XLOCKED);
2870
2871 if ((error = copyin(arg, &cl, sizeof(cl))) != 0)
2872 break;
2873
2874 /* mark this process as owning a log file */
2875 p = td->td_proc;
2876 if ((po = pmc_find_owner_descriptor(p)) == NULL)
2877 if ((po = pmc_allocate_owner_descriptor(p)) == NULL) {
2878 error = ENOMEM;
2879 break;
2880 }
2881
2882 /*
2883 * If a valid fd was passed in, try to configure that,
2884 * otherwise if 'fd' was less than zero and there was
2885 * a log file configured, flush its buffers and
2886 * de-configure it.
2887 */
2888 if (cl.pm_logfd >= 0) {
2889 sx_xunlock(&pmc_sx);
2890 is_sx_locked = 0;
2891 error = pmclog_configure_log(md, po, cl.pm_logfd);
2892 } else if (po->po_flags & PMC_PO_OWNS_LOGFILE) {
2893 pmclog_process_closelog(po);
34
35#include <sys/param.h>
36#include <sys/eventhandler.h>
37#include <sys/jail.h>
38#include <sys/kernel.h>
39#include <sys/kthread.h>
40#include <sys/limits.h>
41#include <sys/lock.h>
42#include <sys/malloc.h>
43#include <sys/module.h>
44#include <sys/mount.h>
45#include <sys/mutex.h>
46#include <sys/pmc.h>
47#include <sys/pmckern.h>
48#include <sys/pmclog.h>
49#include <sys/priv.h>
50#include <sys/proc.h>
51#include <sys/queue.h>
52#include <sys/resourcevar.h>
53#include <sys/sched.h>
54#include <sys/signalvar.h>
55#include <sys/smp.h>
56#include <sys/sx.h>
57#include <sys/sysctl.h>
58#include <sys/sysent.h>
59#include <sys/systm.h>
60#include <sys/vnode.h>
61
62#include <sys/linker.h> /* needs to be after <sys/malloc.h> */
63
64#include <machine/atomic.h>
65#include <machine/md_var.h>
66
67#include <vm/vm.h>
68#include <vm/vm_extern.h>
69#include <vm/pmap.h>
70#include <vm/vm_map.h>
71#include <vm/vm_object.h>
72
73/*
74 * Types
75 */
76
77enum pmc_flags {
78 PMC_FLAG_NONE = 0x00, /* do nothing */
79 PMC_FLAG_REMOVE = 0x01, /* atomically remove entry from hash */
80 PMC_FLAG_ALLOCATE = 0x02, /* add entry to hash if not found */
81};
82
83/*
84 * The offset in sysent where the syscall is allocated.
85 */
86
87static int pmc_syscall_num = NO_SYSCALL;
88struct pmc_cpu **pmc_pcpu; /* per-cpu state */
89pmc_value_t *pmc_pcpu_saved; /* saved PMC values: CSW handling */
90
91#define PMC_PCPU_SAVED(C,R) pmc_pcpu_saved[(R) + md->pmd_npmc*(C)]
92
93struct mtx_pool *pmc_mtxpool;
94static int *pmc_pmcdisp; /* PMC row dispositions */
95
96#define PMC_ROW_DISP_IS_FREE(R) (pmc_pmcdisp[(R)] == 0)
97#define PMC_ROW_DISP_IS_THREAD(R) (pmc_pmcdisp[(R)] > 0)
98#define PMC_ROW_DISP_IS_STANDALONE(R) (pmc_pmcdisp[(R)] < 0)
99
100#define PMC_MARK_ROW_FREE(R) do { \
101 pmc_pmcdisp[(R)] = 0; \
102} while (0)
103
104#define PMC_MARK_ROW_STANDALONE(R) do { \
105 KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \
106 __LINE__)); \
107 atomic_add_int(&pmc_pmcdisp[(R)], -1); \
108 KASSERT(pmc_pmcdisp[(R)] >= (-pmc_cpu_max_active()), \
109 ("[pmc,%d] row disposition error", __LINE__)); \
110} while (0)
111
112#define PMC_UNMARK_ROW_STANDALONE(R) do { \
113 atomic_add_int(&pmc_pmcdisp[(R)], 1); \
114 KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \
115 __LINE__)); \
116} while (0)
117
118#define PMC_MARK_ROW_THREAD(R) do { \
119 KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \
120 __LINE__)); \
121 atomic_add_int(&pmc_pmcdisp[(R)], 1); \
122} while (0)
123
124#define PMC_UNMARK_ROW_THREAD(R) do { \
125 atomic_add_int(&pmc_pmcdisp[(R)], -1); \
126 KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \
127 __LINE__)); \
128} while (0)
129
130
131/* various event handlers */
132static eventhandler_tag pmc_exit_tag, pmc_fork_tag;
133
134/* Module statistics */
135struct pmc_op_getdriverstats pmc_stats;
136
137/* Machine/processor dependent operations */
138static struct pmc_mdep *md;
139
140/*
141 * Hash tables mapping owner processes and target threads to PMCs.
142 */
143
144struct mtx pmc_processhash_mtx; /* spin mutex */
145static u_long pmc_processhashmask;
146static LIST_HEAD(pmc_processhash, pmc_process) *pmc_processhash;
147
148/*
149 * Hash table of PMC owner descriptors. This table is protected by
150 * the shared PMC "sx" lock.
151 */
152
153static u_long pmc_ownerhashmask;
154static LIST_HEAD(pmc_ownerhash, pmc_owner) *pmc_ownerhash;
155
156/*
157 * List of PMC owners with system-wide sampling PMCs.
158 */
159
160static LIST_HEAD(, pmc_owner) pmc_ss_owners;
161
162
163/*
164 * A map of row indices to classdep structures.
165 */
166static struct pmc_classdep **pmc_rowindex_to_classdep;
167
168/*
169 * Prototypes
170 */
171
172#ifdef DEBUG
173static int pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS);
174static int pmc_debugflags_parse(char *newstr, char *fence);
175#endif
176
177static int load(struct module *module, int cmd, void *arg);
178static int pmc_attach_process(struct proc *p, struct pmc *pm);
179static struct pmc *pmc_allocate_pmc_descriptor(void);
180static struct pmc_owner *pmc_allocate_owner_descriptor(struct proc *p);
181static int pmc_attach_one_process(struct proc *p, struct pmc *pm);
182static int pmc_can_allocate_rowindex(struct proc *p, unsigned int ri,
183 int cpu);
184static int pmc_can_attach(struct pmc *pm, struct proc *p);
185static void pmc_capture_user_callchain(int cpu, struct trapframe *tf);
186static void pmc_cleanup(void);
187static int pmc_detach_process(struct proc *p, struct pmc *pm);
188static int pmc_detach_one_process(struct proc *p, struct pmc *pm,
189 int flags);
190static void pmc_destroy_owner_descriptor(struct pmc_owner *po);
191static struct pmc_owner *pmc_find_owner_descriptor(struct proc *p);
192static int pmc_find_pmc(pmc_id_t pmcid, struct pmc **pm);
193static struct pmc *pmc_find_pmc_descriptor_in_process(struct pmc_owner *po,
194 pmc_id_t pmc);
195static struct pmc_process *pmc_find_process_descriptor(struct proc *p,
196 uint32_t mode);
197static void pmc_force_context_switch(void);
198static void pmc_link_target_process(struct pmc *pm,
199 struct pmc_process *pp);
200static void pmc_log_all_process_mappings(struct pmc_owner *po);
201static void pmc_log_kernel_mappings(struct pmc *pm);
202static void pmc_log_process_mappings(struct pmc_owner *po, struct proc *p);
203static void pmc_maybe_remove_owner(struct pmc_owner *po);
204static void pmc_process_csw_in(struct thread *td);
205static void pmc_process_csw_out(struct thread *td);
206static void pmc_process_exit(void *arg, struct proc *p);
207static void pmc_process_fork(void *arg, struct proc *p1,
208 struct proc *p2, int n);
209static void pmc_process_samples(int cpu);
210static void pmc_release_pmc_descriptor(struct pmc *pmc);
211static void pmc_remove_owner(struct pmc_owner *po);
212static void pmc_remove_process_descriptor(struct pmc_process *pp);
213static void pmc_restore_cpu_binding(struct pmc_binding *pb);
214static void pmc_save_cpu_binding(struct pmc_binding *pb);
215static void pmc_select_cpu(int cpu);
216static int pmc_start(struct pmc *pm);
217static int pmc_stop(struct pmc *pm);
218static int pmc_syscall_handler(struct thread *td, void *syscall_args);
219static void pmc_unlink_target_process(struct pmc *pmc,
220 struct pmc_process *pp);
221
222/*
223 * Kernel tunables and sysctl(8) interface.
224 */
225
226SYSCTL_NODE(_kern, OID_AUTO, hwpmc, CTLFLAG_RW, 0, "HWPMC parameters");
227
228static int pmc_callchaindepth = PMC_CALLCHAIN_DEPTH;
229TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "callchaindepth", &pmc_callchaindepth);
230SYSCTL_INT(_kern_hwpmc, OID_AUTO, callchaindepth, CTLFLAG_TUN|CTLFLAG_RD,
231 &pmc_callchaindepth, 0, "depth of call chain records");
232
233#ifdef DEBUG
234struct pmc_debugflags pmc_debugflags = PMC_DEBUG_DEFAULT_FLAGS;
235char pmc_debugstr[PMC_DEBUG_STRSIZE];
236TUNABLE_STR(PMC_SYSCTL_NAME_PREFIX "debugflags", pmc_debugstr,
237 sizeof(pmc_debugstr));
238SYSCTL_PROC(_kern_hwpmc, OID_AUTO, debugflags,
239 CTLTYPE_STRING|CTLFLAG_RW|CTLFLAG_TUN,
240 0, 0, pmc_debugflags_sysctl_handler, "A", "debug flags");
241#endif
242
243/*
244 * kern.hwpmc.hashrows -- determines the number of rows in the
245 * of the hash table used to look up threads
246 */
247
248static int pmc_hashsize = PMC_HASH_SIZE;
249TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "hashsize", &pmc_hashsize);
250SYSCTL_INT(_kern_hwpmc, OID_AUTO, hashsize, CTLFLAG_TUN|CTLFLAG_RD,
251 &pmc_hashsize, 0, "rows in hash tables");
252
253/*
254 * kern.hwpmc.nsamples --- number of PC samples/callchain stacks per CPU
255 */
256
257static int pmc_nsamples = PMC_NSAMPLES;
258TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "nsamples", &pmc_nsamples);
259SYSCTL_INT(_kern_hwpmc, OID_AUTO, nsamples, CTLFLAG_TUN|CTLFLAG_RD,
260 &pmc_nsamples, 0, "number of PC samples per CPU");
261
262
263/*
264 * kern.hwpmc.mtxpoolsize -- number of mutexes in the mutex pool.
265 */
266
267static int pmc_mtxpool_size = PMC_MTXPOOL_SIZE;
268TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "mtxpoolsize", &pmc_mtxpool_size);
269SYSCTL_INT(_kern_hwpmc, OID_AUTO, mtxpoolsize, CTLFLAG_TUN|CTLFLAG_RD,
270 &pmc_mtxpool_size, 0, "size of spin mutex pool");
271
272
273/*
274 * security.bsd.unprivileged_syspmcs -- allow non-root processes to
275 * allocate system-wide PMCs.
276 *
277 * Allowing unprivileged processes to allocate system PMCs is convenient
278 * if system-wide measurements need to be taken concurrently with other
279 * per-process measurements. This feature is turned off by default.
280 */
281
282static int pmc_unprivileged_syspmcs = 0;
283TUNABLE_INT("security.bsd.unprivileged_syspmcs", &pmc_unprivileged_syspmcs);
284SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_syspmcs, CTLFLAG_RW,
285 &pmc_unprivileged_syspmcs, 0,
286 "allow unprivileged process to allocate system PMCs");
287
288/*
289 * Hash function. Discard the lower 2 bits of the pointer since
290 * these are always zero for our uses. The hash multiplier is
291 * round((2^LONG_BIT) * ((sqrt(5)-1)/2)).
292 */
293
294#if LONG_BIT == 64
295#define _PMC_HM 11400714819323198486u
296#elif LONG_BIT == 32
297#define _PMC_HM 2654435769u
298#else
299#error Must know the size of 'long' to compile
300#endif
301
302#define PMC_HASH_PTR(P,M) ((((unsigned long) (P) >> 2) * _PMC_HM) & (M))
303
304/*
305 * Syscall structures
306 */
307
308/* The `sysent' for the new syscall */
309static struct sysent pmc_sysent = {
310 2, /* sy_narg */
311 pmc_syscall_handler /* sy_call */
312};
313
314static struct syscall_module_data pmc_syscall_mod = {
315 load,
316 NULL,
317 &pmc_syscall_num,
318 &pmc_sysent,
319 { 0, NULL }
320};
321
322static moduledata_t pmc_mod = {
323 PMC_MODULE_NAME,
324 syscall_module_handler,
325 &pmc_syscall_mod
326};
327
328DECLARE_MODULE(pmc, pmc_mod, SI_SUB_SMP, SI_ORDER_ANY);
329MODULE_VERSION(pmc, PMC_VERSION);
330
331#ifdef DEBUG
332enum pmc_dbgparse_state {
333 PMCDS_WS, /* in whitespace */
334 PMCDS_MAJOR, /* seen a major keyword */
335 PMCDS_MINOR
336};
337
338static int
339pmc_debugflags_parse(char *newstr, char *fence)
340{
341 char c, *p, *q;
342 struct pmc_debugflags *tmpflags;
343 int error, found, *newbits, tmp;
344 size_t kwlen;
345
346 tmpflags = malloc(sizeof(*tmpflags), M_PMC, M_WAITOK|M_ZERO);
347
348 p = newstr;
349 error = 0;
350
351 for (; p < fence && (c = *p); p++) {
352
353 /* skip white space */
354 if (c == ' ' || c == '\t')
355 continue;
356
357 /* look for a keyword followed by "=" */
358 for (q = p; p < fence && (c = *p) && c != '='; p++)
359 ;
360 if (c != '=') {
361 error = EINVAL;
362 goto done;
363 }
364
365 kwlen = p - q;
366 newbits = NULL;
367
368 /* lookup flag group name */
369#define DBG_SET_FLAG_MAJ(S,F) \
370 if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0) \
371 newbits = &tmpflags->pdb_ ## F;
372
373 DBG_SET_FLAG_MAJ("cpu", CPU);
374 DBG_SET_FLAG_MAJ("csw", CSW);
375 DBG_SET_FLAG_MAJ("logging", LOG);
376 DBG_SET_FLAG_MAJ("module", MOD);
377 DBG_SET_FLAG_MAJ("md", MDP);
378 DBG_SET_FLAG_MAJ("owner", OWN);
379 DBG_SET_FLAG_MAJ("pmc", PMC);
380 DBG_SET_FLAG_MAJ("process", PRC);
381 DBG_SET_FLAG_MAJ("sampling", SAM);
382
383 if (newbits == NULL) {
384 error = EINVAL;
385 goto done;
386 }
387
388 p++; /* skip the '=' */
389
390 /* Now parse the individual flags */
391 tmp = 0;
392 newflag:
393 for (q = p; p < fence && (c = *p); p++)
394 if (c == ' ' || c == '\t' || c == ',')
395 break;
396
397 /* p == fence or c == ws or c == "," or c == 0 */
398
399 if ((kwlen = p - q) == 0) {
400 *newbits = tmp;
401 continue;
402 }
403
404 found = 0;
405#define DBG_SET_FLAG_MIN(S,F) \
406 if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0) \
407 tmp |= found = (1 << PMC_DEBUG_MIN_ ## F)
408
409 /* a '*' denotes all possible flags in the group */
410 if (kwlen == 1 && *q == '*')
411 tmp = found = ~0;
412 /* look for individual flag names */
413 DBG_SET_FLAG_MIN("allocaterow", ALR);
414 DBG_SET_FLAG_MIN("allocate", ALL);
415 DBG_SET_FLAG_MIN("attach", ATT);
416 DBG_SET_FLAG_MIN("bind", BND);
417 DBG_SET_FLAG_MIN("config", CFG);
418 DBG_SET_FLAG_MIN("exec", EXC);
419 DBG_SET_FLAG_MIN("exit", EXT);
420 DBG_SET_FLAG_MIN("find", FND);
421 DBG_SET_FLAG_MIN("flush", FLS);
422 DBG_SET_FLAG_MIN("fork", FRK);
423 DBG_SET_FLAG_MIN("getbuf", GTB);
424 DBG_SET_FLAG_MIN("hook", PMH);
425 DBG_SET_FLAG_MIN("init", INI);
426 DBG_SET_FLAG_MIN("intr", INT);
427 DBG_SET_FLAG_MIN("linktarget", TLK);
428 DBG_SET_FLAG_MIN("mayberemove", OMR);
429 DBG_SET_FLAG_MIN("ops", OPS);
430 DBG_SET_FLAG_MIN("read", REA);
431 DBG_SET_FLAG_MIN("register", REG);
432 DBG_SET_FLAG_MIN("release", REL);
433 DBG_SET_FLAG_MIN("remove", ORM);
434 DBG_SET_FLAG_MIN("sample", SAM);
435 DBG_SET_FLAG_MIN("scheduleio", SIO);
436 DBG_SET_FLAG_MIN("select", SEL);
437 DBG_SET_FLAG_MIN("signal", SIG);
438 DBG_SET_FLAG_MIN("swi", SWI);
439 DBG_SET_FLAG_MIN("swo", SWO);
440 DBG_SET_FLAG_MIN("start", STA);
441 DBG_SET_FLAG_MIN("stop", STO);
442 DBG_SET_FLAG_MIN("syscall", PMS);
443 DBG_SET_FLAG_MIN("unlinktarget", TUL);
444 DBG_SET_FLAG_MIN("write", WRI);
445 if (found == 0) {
446 /* unrecognized flag name */
447 error = EINVAL;
448 goto done;
449 }
450
451 if (c == 0 || c == ' ' || c == '\t') { /* end of flag group */
452 *newbits = tmp;
453 continue;
454 }
455
456 p++;
457 goto newflag;
458 }
459
460 /* save the new flag set */
461 bcopy(tmpflags, &pmc_debugflags, sizeof(pmc_debugflags));
462
463 done:
464 free(tmpflags, M_PMC);
465 return error;
466}
467
468static int
469pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS)
470{
471 char *fence, *newstr;
472 int error;
473 unsigned int n;
474
475 (void) arg1; (void) arg2; /* unused parameters */
476
477 n = sizeof(pmc_debugstr);
478 newstr = malloc(n, M_PMC, M_WAITOK|M_ZERO);
479 (void) strlcpy(newstr, pmc_debugstr, n);
480
481 error = sysctl_handle_string(oidp, newstr, n, req);
482
483 /* if there is a new string, parse and copy it */
484 if (error == 0 && req->newptr != NULL) {
485 fence = newstr + (n < req->newlen ? n : req->newlen + 1);
486 if ((error = pmc_debugflags_parse(newstr, fence)) == 0)
487 (void) strlcpy(pmc_debugstr, newstr,
488 sizeof(pmc_debugstr));
489 }
490
491 free(newstr, M_PMC);
492
493 return error;
494}
495#endif
496
497/*
498 * Map a row index to a classdep structure and return the adjusted row
499 * index for the PMC class index.
500 */
501static struct pmc_classdep *
502pmc_ri_to_classdep(struct pmc_mdep *md, int ri, int *adjri)
503{
504 struct pmc_classdep *pcd;
505
506 (void) md;
507
508 KASSERT(ri >= 0 && ri < md->pmd_npmc,
509 ("[pmc,%d] illegal row-index %d", __LINE__, ri));
510
511 pcd = pmc_rowindex_to_classdep[ri];
512
513 KASSERT(pcd != NULL,
514 ("[pmc,%d] ri %d null pcd", __LINE__, ri));
515
516 *adjri = ri - pcd->pcd_ri;
517
518 KASSERT(*adjri >= 0 && *adjri < pcd->pcd_num,
519 ("[pmc,%d] adjusted row-index %d", __LINE__, *adjri));
520
521 return (pcd);
522}
523
524/*
525 * Concurrency Control
526 *
527 * The driver manages the following data structures:
528 *
529 * - target process descriptors, one per target process
530 * - owner process descriptors (and attached lists), one per owner process
531 * - lookup hash tables for owner and target processes
532 * - PMC descriptors (and attached lists)
533 * - per-cpu hardware state
534 * - the 'hook' variable through which the kernel calls into
535 * this module
536 * - the machine hardware state (managed by the MD layer)
537 *
538 * These data structures are accessed from:
539 *
540 * - thread context-switch code
541 * - interrupt handlers (possibly on multiple cpus)
542 * - kernel threads on multiple cpus running on behalf of user
543 * processes doing system calls
544 * - this driver's private kernel threads
545 *
546 * = Locks and Locking strategy =
547 *
548 * The driver uses four locking strategies for its operation:
549 *
550 * - The global SX lock "pmc_sx" is used to protect internal
551 * data structures.
552 *
553 * Calls into the module by syscall() start with this lock being
554 * held in exclusive mode. Depending on the requested operation,
555 * the lock may be downgraded to 'shared' mode to allow more
556 * concurrent readers into the module. Calls into the module from
557 * other parts of the kernel acquire the lock in shared mode.
558 *
559 * This SX lock is held in exclusive mode for any operations that
560 * modify the linkages between the driver's internal data structures.
561 *
562 * The 'pmc_hook' function pointer is also protected by this lock.
563 * It is only examined with the sx lock held in exclusive mode. The
564 * kernel module is allowed to be unloaded only with the sx lock held
565 * in exclusive mode. In normal syscall handling, after acquiring the
566 * pmc_sx lock we first check that 'pmc_hook' is non-null before
567 * proceeding. This prevents races between the thread unloading the module
568 * and other threads seeking to use the module.
569 *
570 * - Lookups of target process structures and owner process structures
571 * cannot use the global "pmc_sx" SX lock because these lookups need
572 * to happen during context switches and in other critical sections
573 * where sleeping is not allowed. We protect these lookup tables
574 * with their own private spin-mutexes, "pmc_processhash_mtx" and
575 * "pmc_ownerhash_mtx".
576 *
577 * - Interrupt handlers work in a lock free manner. At interrupt
578 * time, handlers look at the PMC pointer (phw->phw_pmc) configured
579 * when the PMC was started. If this pointer is NULL, the interrupt
580 * is ignored after updating driver statistics. We ensure that this
581 * pointer is set (using an atomic operation if necessary) before the
582 * PMC hardware is started. Conversely, this pointer is unset atomically
583 * only after the PMC hardware is stopped.
584 *
585 * We ensure that everything needed for the operation of an
586 * interrupt handler is available without it needing to acquire any
587 * locks. We also ensure that a PMC's software state is destroyed only
588 * after the PMC is taken off hardware (on all CPUs).
589 *
590 * - Context-switch handling with process-private PMCs needs more
591 * care.
592 *
593 * A given process may be the target of multiple PMCs. For example,
594 * PMCATTACH and PMCDETACH may be requested by a process on one CPU
595 * while the target process is running on another. A PMC could also
596 * be getting released because its owner is exiting. We tackle
597 * these situations in the following manner:
598 *
599 * - each target process structure 'pmc_process' has an array
600 * of 'struct pmc *' pointers, one for each hardware PMC.
601 *
602 * - At context switch IN time, each "target" PMC in RUNNING state
603 * gets started on hardware and a pointer to each PMC is copied into
604 * the per-cpu phw array. The 'runcount' for the PMC is
605 * incremented.
606 *
607 * - At context switch OUT time, all process-virtual PMCs are stopped
608 * on hardware. The saved value is added to the PMCs value field
609 * only if the PMC is in a non-deleted state (the PMCs state could
610 * have changed during the current time slice).
611 *
612 * Note that since in-between a switch IN on a processor and a switch
613 * OUT, the PMC could have been released on another CPU. Therefore
614 * context switch OUT always looks at the hardware state to turn
615 * OFF PMCs and will update a PMC's saved value only if reachable
616 * from the target process record.
617 *
618 * - OP PMCRELEASE could be called on a PMC at any time (the PMC could
619 * be attached to many processes at the time of the call and could
620 * be active on multiple CPUs).
621 *
622 * We prevent further scheduling of the PMC by marking it as in
623 * state 'DELETED'. If the runcount of the PMC is non-zero then
624 * this PMC is currently running on a CPU somewhere. The thread
625 * doing the PMCRELEASE operation waits by repeatedly doing a
626 * pause() till the runcount comes to zero.
627 *
628 * The contents of a PMC descriptor (struct pmc) are protected using
629 * a spin-mutex. In order to save space, we use a mutex pool.
630 *
631 * In terms of lock types used by witness(4), we use:
632 * - Type "pmc-sx", used by the global SX lock.
633 * - Type "pmc-sleep", for sleep mutexes used by logger threads.
634 * - Type "pmc-per-proc", for protecting PMC owner descriptors.
635 * - Type "pmc-leaf", used for all other spin mutexes.
636 */
637
638/*
639 * save the cpu binding of the current kthread
640 */
641
642static void
643pmc_save_cpu_binding(struct pmc_binding *pb)
644{
645 PMCDBG(CPU,BND,2, "%s", "save-cpu");
646 thread_lock(curthread);
647 pb->pb_bound = sched_is_bound(curthread);
648 pb->pb_cpu = curthread->td_oncpu;
649 thread_unlock(curthread);
650 PMCDBG(CPU,BND,2, "save-cpu cpu=%d", pb->pb_cpu);
651}
652
653/*
654 * restore the cpu binding of the current thread
655 */
656
657static void
658pmc_restore_cpu_binding(struct pmc_binding *pb)
659{
660 PMCDBG(CPU,BND,2, "restore-cpu curcpu=%d restore=%d",
661 curthread->td_oncpu, pb->pb_cpu);
662 thread_lock(curthread);
663 if (pb->pb_bound)
664 sched_bind(curthread, pb->pb_cpu);
665 else
666 sched_unbind(curthread);
667 thread_unlock(curthread);
668 PMCDBG(CPU,BND,2, "%s", "restore-cpu done");
669}
670
671/*
672 * move execution over the specified cpu and bind it there.
673 */
674
675static void
676pmc_select_cpu(int cpu)
677{
678 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
679 ("[pmc,%d] bad cpu number %d", __LINE__, cpu));
680
681 /* Never move to an inactive CPU. */
682 KASSERT(pmc_cpu_is_active(cpu), ("[pmc,%d] selecting inactive "
683 "CPU %d", __LINE__, cpu));
684
685 PMCDBG(CPU,SEL,2, "select-cpu cpu=%d", cpu);
686 thread_lock(curthread);
687 sched_bind(curthread, cpu);
688 thread_unlock(curthread);
689
690 KASSERT(curthread->td_oncpu == cpu,
691 ("[pmc,%d] CPU not bound [cpu=%d, curr=%d]", __LINE__,
692 cpu, curthread->td_oncpu));
693
694 PMCDBG(CPU,SEL,2, "select-cpu cpu=%d ok", cpu);
695}
696
697/*
698 * Force a context switch.
699 *
700 * We do this by pause'ing for 1 tick -- invoking mi_switch() is not
701 * guaranteed to force a context switch.
702 */
703
704static void
705pmc_force_context_switch(void)
706{
707
708 pause("pmcctx", 1);
709}
710
711/*
712 * Get the file name for an executable. This is a simple wrapper
713 * around vn_fullpath(9).
714 */
715
716static void
717pmc_getfilename(struct vnode *v, char **fullpath, char **freepath)
718{
719
720 *fullpath = "unknown";
721 *freepath = NULL;
722 vn_fullpath(curthread, v, fullpath, freepath);
723}
724
725/*
726 * remove an process owning PMCs
727 */
728
729void
730pmc_remove_owner(struct pmc_owner *po)
731{
732 struct pmc *pm, *tmp;
733
734 sx_assert(&pmc_sx, SX_XLOCKED);
735
736 PMCDBG(OWN,ORM,1, "remove-owner po=%p", po);
737
738 /* Remove descriptor from the owner hash table */
739 LIST_REMOVE(po, po_next);
740
741 /* release all owned PMC descriptors */
742 LIST_FOREACH_SAFE(pm, &po->po_pmcs, pm_next, tmp) {
743 PMCDBG(OWN,ORM,2, "pmc=%p", pm);
744 KASSERT(pm->pm_owner == po,
745 ("[pmc,%d] owner %p != po %p", __LINE__, pm->pm_owner, po));
746
747 pmc_release_pmc_descriptor(pm); /* will unlink from the list */
748 }
749
750 KASSERT(po->po_sscount == 0,
751 ("[pmc,%d] SS count not zero", __LINE__));
752 KASSERT(LIST_EMPTY(&po->po_pmcs),
753 ("[pmc,%d] PMC list not empty", __LINE__));
754
755 /* de-configure the log file if present */
756 if (po->po_flags & PMC_PO_OWNS_LOGFILE)
757 pmclog_deconfigure_log(po);
758}
759
760/*
761 * remove an owner process record if all conditions are met.
762 */
763
764static void
765pmc_maybe_remove_owner(struct pmc_owner *po)
766{
767
768 PMCDBG(OWN,OMR,1, "maybe-remove-owner po=%p", po);
769
770 /*
771 * Remove owner record if
772 * - this process does not own any PMCs
773 * - this process has not allocated a system-wide sampling buffer
774 */
775
776 if (LIST_EMPTY(&po->po_pmcs) &&
777 ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)) {
778 pmc_remove_owner(po);
779 pmc_destroy_owner_descriptor(po);
780 }
781}
782
783/*
784 * Add an association between a target process and a PMC.
785 */
786
787static void
788pmc_link_target_process(struct pmc *pm, struct pmc_process *pp)
789{
790 int ri;
791 struct pmc_target *pt;
792
793 sx_assert(&pmc_sx, SX_XLOCKED);
794
795 KASSERT(pm != NULL && pp != NULL,
796 ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp));
797 KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)),
798 ("[pmc,%d] Attaching a non-process-virtual pmc=%p to pid=%d",
799 __LINE__, pm, pp->pp_proc->p_pid));
800 KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= ((int) md->pmd_npmc - 1),
801 ("[pmc,%d] Illegal reference count %d for process record %p",
802 __LINE__, pp->pp_refcnt, (void *) pp));
803
804 ri = PMC_TO_ROWINDEX(pm);
805
806 PMCDBG(PRC,TLK,1, "link-target pmc=%p ri=%d pmc-process=%p",
807 pm, ri, pp);
808
809#ifdef DEBUG
810 LIST_FOREACH(pt, &pm->pm_targets, pt_next)
811 if (pt->pt_process == pp)
812 KASSERT(0, ("[pmc,%d] pp %p already in pmc %p targets",
813 __LINE__, pp, pm));
814#endif
815
816 pt = malloc(sizeof(struct pmc_target), M_PMC, M_WAITOK|M_ZERO);
817 pt->pt_process = pp;
818
819 LIST_INSERT_HEAD(&pm->pm_targets, pt, pt_next);
820
821 atomic_store_rel_ptr((uintptr_t *)&pp->pp_pmcs[ri].pp_pmc,
822 (uintptr_t)pm);
823
824 if (pm->pm_owner->po_owner == pp->pp_proc)
825 pm->pm_flags |= PMC_F_ATTACHED_TO_OWNER;
826
827 /*
828 * Initialize the per-process values at this row index.
829 */
830 pp->pp_pmcs[ri].pp_pmcval = PMC_TO_MODE(pm) == PMC_MODE_TS ?
831 pm->pm_sc.pm_reloadcount : 0;
832
833 pp->pp_refcnt++;
834
835}
836
837/*
838 * Removes the association between a target process and a PMC.
839 */
840
841static void
842pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp)
843{
844 int ri;
845 struct proc *p;
846 struct pmc_target *ptgt;
847
848 sx_assert(&pmc_sx, SX_XLOCKED);
849
850 KASSERT(pm != NULL && pp != NULL,
851 ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp));
852
853 KASSERT(pp->pp_refcnt >= 1 && pp->pp_refcnt <= (int) md->pmd_npmc,
854 ("[pmc,%d] Illegal ref count %d on process record %p",
855 __LINE__, pp->pp_refcnt, (void *) pp));
856
857 ri = PMC_TO_ROWINDEX(pm);
858
859 PMCDBG(PRC,TUL,1, "unlink-target pmc=%p ri=%d pmc-process=%p",
860 pm, ri, pp);
861
862 KASSERT(pp->pp_pmcs[ri].pp_pmc == pm,
863 ("[pmc,%d] PMC ri %d mismatch pmc %p pp->[ri] %p", __LINE__,
864 ri, pm, pp->pp_pmcs[ri].pp_pmc));
865
866 pp->pp_pmcs[ri].pp_pmc = NULL;
867 pp->pp_pmcs[ri].pp_pmcval = (pmc_value_t) 0;
868
869 /* Remove owner-specific flags */
870 if (pm->pm_owner->po_owner == pp->pp_proc) {
871 pp->pp_flags &= ~PMC_PP_ENABLE_MSR_ACCESS;
872 pm->pm_flags &= ~PMC_F_ATTACHED_TO_OWNER;
873 }
874
875 pp->pp_refcnt--;
876
877 /* Remove the target process from the PMC structure */
878 LIST_FOREACH(ptgt, &pm->pm_targets, pt_next)
879 if (ptgt->pt_process == pp)
880 break;
881
882 KASSERT(ptgt != NULL, ("[pmc,%d] process %p (pp: %p) not found "
883 "in pmc %p", __LINE__, pp->pp_proc, pp, pm));
884
885 LIST_REMOVE(ptgt, pt_next);
886 free(ptgt, M_PMC);
887
888 /* if the PMC now lacks targets, send the owner a SIGIO */
889 if (LIST_EMPTY(&pm->pm_targets)) {
890 p = pm->pm_owner->po_owner;
891 PROC_LOCK(p);
892 kern_psignal(p, SIGIO);
893 PROC_UNLOCK(p);
894
895 PMCDBG(PRC,SIG,2, "signalling proc=%p signal=%d", p,
896 SIGIO);
897 }
898}
899
900/*
901 * Check if PMC 'pm' may be attached to target process 't'.
902 */
903
904static int
905pmc_can_attach(struct pmc *pm, struct proc *t)
906{
907 struct proc *o; /* pmc owner */
908 struct ucred *oc, *tc; /* owner, target credentials */
909 int decline_attach, i;
910
911 /*
912 * A PMC's owner can always attach that PMC to itself.
913 */
914
915 if ((o = pm->pm_owner->po_owner) == t)
916 return 0;
917
918 PROC_LOCK(o);
919 oc = o->p_ucred;
920 crhold(oc);
921 PROC_UNLOCK(o);
922
923 PROC_LOCK(t);
924 tc = t->p_ucred;
925 crhold(tc);
926 PROC_UNLOCK(t);
927
928 /*
929 * The effective uid of the PMC owner should match at least one
930 * of the {effective,real,saved} uids of the target process.
931 */
932
933 decline_attach = oc->cr_uid != tc->cr_uid &&
934 oc->cr_uid != tc->cr_svuid &&
935 oc->cr_uid != tc->cr_ruid;
936
937 /*
938 * Every one of the target's group ids, must be in the owner's
939 * group list.
940 */
941 for (i = 0; !decline_attach && i < tc->cr_ngroups; i++)
942 decline_attach = !groupmember(tc->cr_groups[i], oc);
943
944 /* check the read and saved gids too */
945 if (decline_attach == 0)
946 decline_attach = !groupmember(tc->cr_rgid, oc) ||
947 !groupmember(tc->cr_svgid, oc);
948
949 crfree(tc);
950 crfree(oc);
951
952 return !decline_attach;
953}
954
955/*
956 * Attach a process to a PMC.
957 */
958
959static int
960pmc_attach_one_process(struct proc *p, struct pmc *pm)
961{
962 int ri;
963 char *fullpath, *freepath;
964 struct pmc_process *pp;
965
966 sx_assert(&pmc_sx, SX_XLOCKED);
967
968 PMCDBG(PRC,ATT,2, "attach-one pm=%p ri=%d proc=%p (%d, %s)", pm,
969 PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm);
970
971 /*
972 * Locate the process descriptor corresponding to process 'p',
973 * allocating space as needed.
974 *
975 * Verify that rowindex 'pm_rowindex' is free in the process
976 * descriptor.
977 *
978 * If not, allocate space for a descriptor and link the
979 * process descriptor and PMC.
980 */
981 ri = PMC_TO_ROWINDEX(pm);
982
983 if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_ALLOCATE)) == NULL)
984 return ENOMEM;
985
986 if (pp->pp_pmcs[ri].pp_pmc == pm) /* already present at slot [ri] */
987 return EEXIST;
988
989 if (pp->pp_pmcs[ri].pp_pmc != NULL)
990 return EBUSY;
991
992 pmc_link_target_process(pm, pp);
993
994 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) &&
995 (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) == 0)
996 pm->pm_flags |= PMC_F_NEEDS_LOGFILE;
997
998 pm->pm_flags |= PMC_F_ATTACH_DONE; /* mark as attached */
999
1000 /* issue an attach event to a configured log file */
1001 if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE) {
1002 pmc_getfilename(p->p_textvp, &fullpath, &freepath);
1003 if (p->p_flag & P_KTHREAD) {
1004 fullpath = kernelname;
1005 freepath = NULL;
1006 } else
1007 pmclog_process_pmcattach(pm, p->p_pid, fullpath);
1008 if (freepath)
1009 free(freepath, M_TEMP);
1010 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1011 pmc_log_process_mappings(pm->pm_owner, p);
1012 }
1013 /* mark process as using HWPMCs */
1014 PROC_LOCK(p);
1015 p->p_flag |= P_HWPMC;
1016 PROC_UNLOCK(p);
1017
1018 return 0;
1019}
1020
1021/*
1022 * Attach a process and optionally its children
1023 */
1024
1025static int
1026pmc_attach_process(struct proc *p, struct pmc *pm)
1027{
1028 int error;
1029 struct proc *top;
1030
1031 sx_assert(&pmc_sx, SX_XLOCKED);
1032
1033 PMCDBG(PRC,ATT,1, "attach pm=%p ri=%d proc=%p (%d, %s)", pm,
1034 PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm);
1035
1036
1037 /*
1038 * If this PMC successfully allowed a GETMSR operation
1039 * in the past, disallow further ATTACHes.
1040 */
1041
1042 if ((pm->pm_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0)
1043 return EPERM;
1044
1045 if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0)
1046 return pmc_attach_one_process(p, pm);
1047
1048 /*
1049 * Traverse all child processes, attaching them to
1050 * this PMC.
1051 */
1052
1053 sx_slock(&proctree_lock);
1054
1055 top = p;
1056
1057 for (;;) {
1058 if ((error = pmc_attach_one_process(p, pm)) != 0)
1059 break;
1060 if (!LIST_EMPTY(&p->p_children))
1061 p = LIST_FIRST(&p->p_children);
1062 else for (;;) {
1063 if (p == top)
1064 goto done;
1065 if (LIST_NEXT(p, p_sibling)) {
1066 p = LIST_NEXT(p, p_sibling);
1067 break;
1068 }
1069 p = p->p_pptr;
1070 }
1071 }
1072
1073 if (error)
1074 (void) pmc_detach_process(top, pm);
1075
1076 done:
1077 sx_sunlock(&proctree_lock);
1078 return error;
1079}
1080
1081/*
1082 * Detach a process from a PMC. If there are no other PMCs tracking
1083 * this process, remove the process structure from its hash table. If
1084 * 'flags' contains PMC_FLAG_REMOVE, then free the process structure.
1085 */
1086
1087static int
1088pmc_detach_one_process(struct proc *p, struct pmc *pm, int flags)
1089{
1090 int ri;
1091 struct pmc_process *pp;
1092
1093 sx_assert(&pmc_sx, SX_XLOCKED);
1094
1095 KASSERT(pm != NULL,
1096 ("[pmc,%d] null pm pointer", __LINE__));
1097
1098 ri = PMC_TO_ROWINDEX(pm);
1099
1100 PMCDBG(PRC,ATT,2, "detach-one pm=%p ri=%d proc=%p (%d, %s) flags=0x%x",
1101 pm, ri, p, p->p_pid, p->p_comm, flags);
1102
1103 if ((pp = pmc_find_process_descriptor(p, 0)) == NULL)
1104 return ESRCH;
1105
1106 if (pp->pp_pmcs[ri].pp_pmc != pm)
1107 return EINVAL;
1108
1109 pmc_unlink_target_process(pm, pp);
1110
1111 /* Issue a detach entry if a log file is configured */
1112 if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE)
1113 pmclog_process_pmcdetach(pm, p->p_pid);
1114
1115 /*
1116 * If there are no PMCs targetting this process, we remove its
1117 * descriptor from the target hash table and unset the P_HWPMC
1118 * flag in the struct proc.
1119 */
1120 KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= (int) md->pmd_npmc,
1121 ("[pmc,%d] Illegal refcnt %d for process struct %p",
1122 __LINE__, pp->pp_refcnt, pp));
1123
1124 if (pp->pp_refcnt != 0) /* still a target of some PMC */
1125 return 0;
1126
1127 pmc_remove_process_descriptor(pp);
1128
1129 if (flags & PMC_FLAG_REMOVE)
1130 free(pp, M_PMC);
1131
1132 PROC_LOCK(p);
1133 p->p_flag &= ~P_HWPMC;
1134 PROC_UNLOCK(p);
1135
1136 return 0;
1137}
1138
1139/*
1140 * Detach a process and optionally its descendants from a PMC.
1141 */
1142
1143static int
1144pmc_detach_process(struct proc *p, struct pmc *pm)
1145{
1146 struct proc *top;
1147
1148 sx_assert(&pmc_sx, SX_XLOCKED);
1149
1150 PMCDBG(PRC,ATT,1, "detach pm=%p ri=%d proc=%p (%d, %s)", pm,
1151 PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm);
1152
1153 if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0)
1154 return pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE);
1155
1156 /*
1157 * Traverse all children, detaching them from this PMC. We
1158 * ignore errors since we could be detaching a PMC from a
1159 * partially attached proc tree.
1160 */
1161
1162 sx_slock(&proctree_lock);
1163
1164 top = p;
1165
1166 for (;;) {
1167 (void) pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE);
1168
1169 if (!LIST_EMPTY(&p->p_children))
1170 p = LIST_FIRST(&p->p_children);
1171 else for (;;) {
1172 if (p == top)
1173 goto done;
1174 if (LIST_NEXT(p, p_sibling)) {
1175 p = LIST_NEXT(p, p_sibling);
1176 break;
1177 }
1178 p = p->p_pptr;
1179 }
1180 }
1181
1182 done:
1183 sx_sunlock(&proctree_lock);
1184
1185 if (LIST_EMPTY(&pm->pm_targets))
1186 pm->pm_flags &= ~PMC_F_ATTACH_DONE;
1187
1188 return 0;
1189}
1190
1191
1192/*
1193 * Thread context switch IN
1194 */
1195
1196static void
1197pmc_process_csw_in(struct thread *td)
1198{
1199 int cpu;
1200 unsigned int adjri, ri;
1201 struct pmc *pm;
1202 struct proc *p;
1203 struct pmc_cpu *pc;
1204 struct pmc_hw *phw;
1205 pmc_value_t newvalue;
1206 struct pmc_process *pp;
1207 struct pmc_classdep *pcd;
1208
1209 p = td->td_proc;
1210
1211 if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE)) == NULL)
1212 return;
1213
1214 KASSERT(pp->pp_proc == td->td_proc,
1215 ("[pmc,%d] not my thread state", __LINE__));
1216
1217 critical_enter(); /* no preemption from this point */
1218
1219 cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */
1220
1221 PMCDBG(CSW,SWI,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p,
1222 p->p_pid, p->p_comm, pp);
1223
1224 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
1225 ("[pmc,%d] wierd CPU id %d", __LINE__, cpu));
1226
1227 pc = pmc_pcpu[cpu];
1228
1229 for (ri = 0; ri < md->pmd_npmc; ri++) {
1230
1231 if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL)
1232 continue;
1233
1234 KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)),
1235 ("[pmc,%d] Target PMC in non-virtual mode (%d)",
1236 __LINE__, PMC_TO_MODE(pm)));
1237
1238 KASSERT(PMC_TO_ROWINDEX(pm) == ri,
1239 ("[pmc,%d] Row index mismatch pmc %d != ri %d",
1240 __LINE__, PMC_TO_ROWINDEX(pm), ri));
1241
1242 /*
1243 * Only PMCs that are marked as 'RUNNING' need
1244 * be placed on hardware.
1245 */
1246
1247 if (pm->pm_state != PMC_STATE_RUNNING)
1248 continue;
1249
1250 /* increment PMC runcount */
1251 atomic_add_rel_int(&pm->pm_runcount, 1);
1252
1253 /* configure the HWPMC we are going to use. */
1254 pcd = pmc_ri_to_classdep(md, ri, &adjri);
1255 pcd->pcd_config_pmc(cpu, adjri, pm);
1256
1257 phw = pc->pc_hwpmcs[ri];
1258
1259 KASSERT(phw != NULL,
1260 ("[pmc,%d] null hw pointer", __LINE__));
1261
1262 KASSERT(phw->phw_pmc == pm,
1263 ("[pmc,%d] hw->pmc %p != pmc %p", __LINE__,
1264 phw->phw_pmc, pm));
1265
1266 /*
1267 * Write out saved value and start the PMC.
1268 *
1269 * Sampling PMCs use a per-process value, while
1270 * counting mode PMCs use a per-pmc value that is
1271 * inherited across descendants.
1272 */
1273 if (PMC_TO_MODE(pm) == PMC_MODE_TS) {
1274 mtx_pool_lock_spin(pmc_mtxpool, pm);
1275 newvalue = PMC_PCPU_SAVED(cpu,ri) =
1276 pp->pp_pmcs[ri].pp_pmcval;
1277 mtx_pool_unlock_spin(pmc_mtxpool, pm);
1278 } else {
1279 KASSERT(PMC_TO_MODE(pm) == PMC_MODE_TC,
1280 ("[pmc,%d] illegal mode=%d", __LINE__,
1281 PMC_TO_MODE(pm)));
1282 mtx_pool_lock_spin(pmc_mtxpool, pm);
1283 newvalue = PMC_PCPU_SAVED(cpu, ri) =
1284 pm->pm_gv.pm_savedvalue;
1285 mtx_pool_unlock_spin(pmc_mtxpool, pm);
1286 }
1287
1288 PMCDBG(CSW,SWI,1,"cpu=%d ri=%d new=%jd", cpu, ri, newvalue);
1289
1290 pcd->pcd_write_pmc(cpu, adjri, newvalue);
1291 pcd->pcd_start_pmc(cpu, adjri);
1292 }
1293
1294 /*
1295 * perform any other architecture/cpu dependent thread
1296 * switch-in actions.
1297 */
1298
1299 (void) (*md->pmd_switch_in)(pc, pp);
1300
1301 critical_exit();
1302
1303}
1304
1305/*
1306 * Thread context switch OUT.
1307 */
1308
1309static void
1310pmc_process_csw_out(struct thread *td)
1311{
1312 int cpu;
1313 int64_t tmp;
1314 struct pmc *pm;
1315 struct proc *p;
1316 enum pmc_mode mode;
1317 struct pmc_cpu *pc;
1318 pmc_value_t newvalue;
1319 unsigned int adjri, ri;
1320 struct pmc_process *pp;
1321 struct pmc_classdep *pcd;
1322
1323
1324 /*
1325 * Locate our process descriptor; this may be NULL if
1326 * this process is exiting and we have already removed
1327 * the process from the target process table.
1328 *
1329 * Note that due to kernel preemption, multiple
1330 * context switches may happen while the process is
1331 * exiting.
1332 *
1333 * Note also that if the target process cannot be
1334 * found we still need to deconfigure any PMCs that
1335 * are currently running on hardware.
1336 */
1337
1338 p = td->td_proc;
1339 pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE);
1340
1341 /*
1342 * save PMCs
1343 */
1344
1345 critical_enter();
1346
1347 cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */
1348
1349 PMCDBG(CSW,SWO,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p,
1350 p->p_pid, p->p_comm, pp);
1351
1352 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
1353 ("[pmc,%d wierd CPU id %d", __LINE__, cpu));
1354
1355 pc = pmc_pcpu[cpu];
1356
1357 /*
1358 * When a PMC gets unlinked from a target PMC, it will
1359 * be removed from the target's pp_pmc[] array.
1360 *
1361 * However, on a MP system, the target could have been
1362 * executing on another CPU at the time of the unlink.
1363 * So, at context switch OUT time, we need to look at
1364 * the hardware to determine if a PMC is scheduled on
1365 * it.
1366 */
1367
1368 for (ri = 0; ri < md->pmd_npmc; ri++) {
1369
1370 pcd = pmc_ri_to_classdep(md, ri, &adjri);
1371 pm = NULL;
1372 (void) (*pcd->pcd_get_config)(cpu, adjri, &pm);
1373
1374 if (pm == NULL) /* nothing at this row index */
1375 continue;
1376
1377 mode = PMC_TO_MODE(pm);
1378 if (!PMC_IS_VIRTUAL_MODE(mode))
1379 continue; /* not a process virtual PMC */
1380
1381 KASSERT(PMC_TO_ROWINDEX(pm) == ri,
1382 ("[pmc,%d] ri mismatch pmc(%d) ri(%d)",
1383 __LINE__, PMC_TO_ROWINDEX(pm), ri));
1384
1385 /* Stop hardware if not already stopped */
1386 if (pm->pm_stalled == 0)
1387 pcd->pcd_stop_pmc(cpu, adjri);
1388
1389 /* reduce this PMC's runcount */
1390 atomic_subtract_rel_int(&pm->pm_runcount, 1);
1391
1392 /*
1393 * If this PMC is associated with this process,
1394 * save the reading.
1395 */
1396
1397 if (pp != NULL && pp->pp_pmcs[ri].pp_pmc != NULL) {
1398
1399 KASSERT(pm == pp->pp_pmcs[ri].pp_pmc,
1400 ("[pmc,%d] pm %p != pp_pmcs[%d] %p", __LINE__,
1401 pm, ri, pp->pp_pmcs[ri].pp_pmc));
1402
1403 KASSERT(pp->pp_refcnt > 0,
1404 ("[pmc,%d] pp refcnt = %d", __LINE__,
1405 pp->pp_refcnt));
1406
1407 pcd->pcd_read_pmc(cpu, adjri, &newvalue);
1408
1409 tmp = newvalue - PMC_PCPU_SAVED(cpu,ri);
1410
1411 PMCDBG(CSW,SWO,1,"cpu=%d ri=%d tmp=%jd", cpu, ri,
1412 tmp);
1413
1414 if (mode == PMC_MODE_TS) {
1415
1416 /*
1417 * For sampling process-virtual PMCs,
1418 * we expect the count to be
1419 * decreasing as the 'value'
1420 * programmed into the PMC is the
1421 * number of events to be seen till
1422 * the next sampling interrupt.
1423 */
1424 if (tmp < 0)
1425 tmp += pm->pm_sc.pm_reloadcount;
1426 mtx_pool_lock_spin(pmc_mtxpool, pm);
1427 pp->pp_pmcs[ri].pp_pmcval -= tmp;
1428 if ((int64_t) pp->pp_pmcs[ri].pp_pmcval < 0)
1429 pp->pp_pmcs[ri].pp_pmcval +=
1430 pm->pm_sc.pm_reloadcount;
1431 mtx_pool_unlock_spin(pmc_mtxpool, pm);
1432
1433 } else {
1434
1435 /*
1436 * For counting process-virtual PMCs,
1437 * we expect the count to be
1438 * increasing monotonically, modulo a 64
1439 * bit wraparound.
1440 */
1441 KASSERT((int64_t) tmp >= 0,
1442 ("[pmc,%d] negative increment cpu=%d "
1443 "ri=%d newvalue=%jx saved=%jx "
1444 "incr=%jx", __LINE__, cpu, ri,
1445 newvalue, PMC_PCPU_SAVED(cpu,ri), tmp));
1446
1447 mtx_pool_lock_spin(pmc_mtxpool, pm);
1448 pm->pm_gv.pm_savedvalue += tmp;
1449 pp->pp_pmcs[ri].pp_pmcval += tmp;
1450 mtx_pool_unlock_spin(pmc_mtxpool, pm);
1451
1452 if (pm->pm_flags & PMC_F_LOG_PROCCSW)
1453 pmclog_process_proccsw(pm, pp, tmp);
1454 }
1455 }
1456
1457 /* mark hardware as free */
1458 pcd->pcd_config_pmc(cpu, adjri, NULL);
1459 }
1460
1461 /*
1462 * perform any other architecture/cpu dependent thread
1463 * switch out functions.
1464 */
1465
1466 (void) (*md->pmd_switch_out)(pc, pp);
1467
1468 critical_exit();
1469}
1470
1471/*
1472 * Log a KLD operation.
1473 */
1474
1475static void
1476pmc_process_kld_load(struct pmckern_map_in *pkm)
1477{
1478 struct pmc_owner *po;
1479
1480 sx_assert(&pmc_sx, SX_LOCKED);
1481
1482 /*
1483 * Notify owners of system sampling PMCs about KLD operations.
1484 */
1485
1486 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
1487 if (po->po_flags & PMC_PO_OWNS_LOGFILE)
1488 pmclog_process_map_in(po, (pid_t) -1, pkm->pm_address,
1489 (char *) pkm->pm_file);
1490
1491 /*
1492 * TODO: Notify owners of (all) process-sampling PMCs too.
1493 */
1494
1495 return;
1496}
1497
1498static void
1499pmc_process_kld_unload(struct pmckern_map_out *pkm)
1500{
1501 struct pmc_owner *po;
1502
1503 sx_assert(&pmc_sx, SX_LOCKED);
1504
1505 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
1506 if (po->po_flags & PMC_PO_OWNS_LOGFILE)
1507 pmclog_process_map_out(po, (pid_t) -1,
1508 pkm->pm_address, pkm->pm_address + pkm->pm_size);
1509
1510 /*
1511 * TODO: Notify owners of process-sampling PMCs.
1512 */
1513}
1514
1515/*
1516 * A mapping change for a process.
1517 */
1518
1519static void
1520pmc_process_mmap(struct thread *td, struct pmckern_map_in *pkm)
1521{
1522 int ri;
1523 pid_t pid;
1524 char *fullpath, *freepath;
1525 const struct pmc *pm;
1526 struct pmc_owner *po;
1527 const struct pmc_process *pp;
1528
1529 freepath = fullpath = NULL;
1530 pmc_getfilename((struct vnode *) pkm->pm_file, &fullpath, &freepath);
1531
1532 pid = td->td_proc->p_pid;
1533
1534 /* Inform owners of all system-wide sampling PMCs. */
1535 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
1536 if (po->po_flags & PMC_PO_OWNS_LOGFILE)
1537 pmclog_process_map_in(po, pid, pkm->pm_address, fullpath);
1538
1539 if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL)
1540 goto done;
1541
1542 /*
1543 * Inform sampling PMC owners tracking this process.
1544 */
1545 for (ri = 0; ri < md->pmd_npmc; ri++)
1546 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL &&
1547 PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1548 pmclog_process_map_in(pm->pm_owner,
1549 pid, pkm->pm_address, fullpath);
1550
1551 done:
1552 if (freepath)
1553 free(freepath, M_TEMP);
1554}
1555
1556
1557/*
1558 * Log an munmap request.
1559 */
1560
1561static void
1562pmc_process_munmap(struct thread *td, struct pmckern_map_out *pkm)
1563{
1564 int ri;
1565 pid_t pid;
1566 struct pmc_owner *po;
1567 const struct pmc *pm;
1568 const struct pmc_process *pp;
1569
1570 pid = td->td_proc->p_pid;
1571
1572 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
1573 if (po->po_flags & PMC_PO_OWNS_LOGFILE)
1574 pmclog_process_map_out(po, pid, pkm->pm_address,
1575 pkm->pm_address + pkm->pm_size);
1576
1577 if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL)
1578 return;
1579
1580 for (ri = 0; ri < md->pmd_npmc; ri++)
1581 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL &&
1582 PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1583 pmclog_process_map_out(pm->pm_owner, pid,
1584 pkm->pm_address, pkm->pm_address + pkm->pm_size);
1585}
1586
1587/*
1588 * Log mapping information about the kernel.
1589 */
1590
1591static void
1592pmc_log_kernel_mappings(struct pmc *pm)
1593{
1594 struct pmc_owner *po;
1595 struct pmckern_map_in *km, *kmbase;
1596
1597 sx_assert(&pmc_sx, SX_LOCKED);
1598 KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)),
1599 ("[pmc,%d] non-sampling PMC (%p) desires mapping information",
1600 __LINE__, (void *) pm));
1601
1602 po = pm->pm_owner;
1603
1604 if (po->po_flags & PMC_PO_INITIAL_MAPPINGS_DONE)
1605 return;
1606
1607 /*
1608 * Log the current set of kernel modules.
1609 */
1610 kmbase = linker_hwpmc_list_objects();
1611 for (km = kmbase; km->pm_file != NULL; km++) {
1612 PMCDBG(LOG,REG,1,"%s %p", (char *) km->pm_file,
1613 (void *) km->pm_address);
1614 pmclog_process_map_in(po, (pid_t) -1, km->pm_address,
1615 km->pm_file);
1616 }
1617 free(kmbase, M_LINKER);
1618
1619 po->po_flags |= PMC_PO_INITIAL_MAPPINGS_DONE;
1620}
1621
1622/*
1623 * Log the mappings for a single process.
1624 */
1625
1626static void
1627pmc_log_process_mappings(struct pmc_owner *po, struct proc *p)
1628{
1629 int locked;
1630 vm_map_t map;
1631 struct vnode *vp;
1632 struct vmspace *vm;
1633 vm_map_entry_t entry;
1634 vm_offset_t last_end;
1635 u_int last_timestamp;
1636 struct vnode *last_vp;
1637 vm_offset_t start_addr;
1638 vm_object_t obj, lobj, tobj;
1639 char *fullpath, *freepath;
1640
1641 last_vp = NULL;
1642 last_end = (vm_offset_t) 0;
1643 fullpath = freepath = NULL;
1644
1645 if ((vm = vmspace_acquire_ref(p)) == NULL)
1646 return;
1647
1648 map = &vm->vm_map;
1649 vm_map_lock_read(map);
1650
1651 for (entry = map->header.next; entry != &map->header; entry = entry->next) {
1652
1653 if (entry == NULL) {
1654 PMCDBG(LOG,OPS,2, "hwpmc: vm_map entry unexpectedly "
1655 "NULL! pid=%d vm_map=%p\n", p->p_pid, map);
1656 break;
1657 }
1658
1659 /*
1660 * We only care about executable map entries.
1661 */
1662 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) ||
1663 !(entry->protection & VM_PROT_EXECUTE) ||
1664 (entry->object.vm_object == NULL)) {
1665 continue;
1666 }
1667
1668 obj = entry->object.vm_object;
1669 VM_OBJECT_LOCK(obj);
1670
1671 /*
1672 * Walk the backing_object list to find the base
1673 * (non-shadowed) vm_object.
1674 */
1675 for (lobj = tobj = obj; tobj != NULL; tobj = tobj->backing_object) {
1676 if (tobj != obj)
1677 VM_OBJECT_LOCK(tobj);
1678 if (lobj != obj)
1679 VM_OBJECT_UNLOCK(lobj);
1680 lobj = tobj;
1681 }
1682
1683 /*
1684 * At this point lobj is the base vm_object and it is locked.
1685 */
1686 if (lobj == NULL) {
1687 PMCDBG(LOG,OPS,2, "hwpmc: lobj unexpectedly NULL! pid=%d "
1688 "vm_map=%p vm_obj=%p\n", p->p_pid, map, obj);
1689 VM_OBJECT_UNLOCK(obj);
1690 continue;
1691 }
1692
1693 if (lobj->type != OBJT_VNODE || lobj->handle == NULL) {
1694 if (lobj != obj)
1695 VM_OBJECT_UNLOCK(lobj);
1696 VM_OBJECT_UNLOCK(obj);
1697 continue;
1698 }
1699
1700 /*
1701 * Skip contiguous regions that point to the same
1702 * vnode, so we don't emit redundant MAP-IN
1703 * directives.
1704 */
1705 if (entry->start == last_end && lobj->handle == last_vp) {
1706 last_end = entry->end;
1707 if (lobj != obj)
1708 VM_OBJECT_UNLOCK(lobj);
1709 VM_OBJECT_UNLOCK(obj);
1710 continue;
1711 }
1712
1713 /*
1714 * We don't want to keep the proc's vm_map or this
1715 * vm_object locked while we walk the pathname, since
1716 * vn_fullpath() can sleep. However, if we drop the
1717 * lock, it's possible for concurrent activity to
1718 * modify the vm_map list. To protect against this,
1719 * we save the vm_map timestamp before we release the
1720 * lock, and check it after we reacquire the lock
1721 * below.
1722 */
1723 start_addr = entry->start;
1724 last_end = entry->end;
1725 last_timestamp = map->timestamp;
1726 vm_map_unlock_read(map);
1727
1728 vp = lobj->handle;
1729 vref(vp);
1730 if (lobj != obj)
1731 VM_OBJECT_UNLOCK(lobj);
1732
1733 VM_OBJECT_UNLOCK(obj);
1734
1735 freepath = NULL;
1736 pmc_getfilename(vp, &fullpath, &freepath);
1737 last_vp = vp;
1738
1739 locked = VFS_LOCK_GIANT(vp->v_mount);
1740 vrele(vp);
1741 VFS_UNLOCK_GIANT(locked);
1742
1743 vp = NULL;
1744 pmclog_process_map_in(po, p->p_pid, start_addr, fullpath);
1745 if (freepath)
1746 free(freepath, M_TEMP);
1747
1748 vm_map_lock_read(map);
1749
1750 /*
1751 * If our saved timestamp doesn't match, this means
1752 * that the vm_map was modified out from under us and
1753 * we can't trust our current "entry" pointer. Do a
1754 * new lookup for this entry. If there is no entry
1755 * for this address range, vm_map_lookup_entry() will
1756 * return the previous one, so we always want to go to
1757 * entry->next on the next loop iteration.
1758 *
1759 * There is an edge condition here that can occur if
1760 * there is no entry at or before this address. In
1761 * this situation, vm_map_lookup_entry returns
1762 * &map->header, which would cause our loop to abort
1763 * without processing the rest of the map. However,
1764 * in practice this will never happen for process
1765 * vm_map. This is because the executable's text
1766 * segment is the first mapping in the proc's address
1767 * space, and this mapping is never removed until the
1768 * process exits, so there will always be a non-header
1769 * entry at or before the requested address for
1770 * vm_map_lookup_entry to return.
1771 */
1772 if (map->timestamp != last_timestamp)
1773 vm_map_lookup_entry(map, last_end - 1, &entry);
1774 }
1775
1776 vm_map_unlock_read(map);
1777 vmspace_free(vm);
1778 return;
1779}
1780
1781/*
1782 * Log mappings for all processes in the system.
1783 */
1784
1785static void
1786pmc_log_all_process_mappings(struct pmc_owner *po)
1787{
1788 struct proc *p, *top;
1789
1790 sx_assert(&pmc_sx, SX_XLOCKED);
1791
1792 if ((p = pfind(1)) == NULL)
1793 panic("[pmc,%d] Cannot find init", __LINE__);
1794
1795 PROC_UNLOCK(p);
1796
1797 sx_slock(&proctree_lock);
1798
1799 top = p;
1800
1801 for (;;) {
1802 pmc_log_process_mappings(po, p);
1803 if (!LIST_EMPTY(&p->p_children))
1804 p = LIST_FIRST(&p->p_children);
1805 else for (;;) {
1806 if (p == top)
1807 goto done;
1808 if (LIST_NEXT(p, p_sibling)) {
1809 p = LIST_NEXT(p, p_sibling);
1810 break;
1811 }
1812 p = p->p_pptr;
1813 }
1814 }
1815 done:
1816 sx_sunlock(&proctree_lock);
1817}
1818
1819/*
1820 * The 'hook' invoked from the kernel proper
1821 */
1822
1823
1824#ifdef DEBUG
1825const char *pmc_hooknames[] = {
1826 /* these strings correspond to PMC_FN_* in <sys/pmckern.h> */
1827 "",
1828 "EXEC",
1829 "CSW-IN",
1830 "CSW-OUT",
1831 "SAMPLE",
1832 "KLDLOAD",
1833 "KLDUNLOAD",
1834 "MMAP",
1835 "MUNMAP",
1836 "CALLCHAIN"
1837};
1838#endif
1839
1840static int
1841pmc_hook_handler(struct thread *td, int function, void *arg)
1842{
1843
1844 PMCDBG(MOD,PMH,1, "hook td=%p func=%d \"%s\" arg=%p", td, function,
1845 pmc_hooknames[function], arg);
1846
1847 switch (function)
1848 {
1849
1850 /*
1851 * Process exec()
1852 */
1853
1854 case PMC_FN_PROCESS_EXEC:
1855 {
1856 char *fullpath, *freepath;
1857 unsigned int ri;
1858 int is_using_hwpmcs;
1859 struct pmc *pm;
1860 struct proc *p;
1861 struct pmc_owner *po;
1862 struct pmc_process *pp;
1863 struct pmckern_procexec *pk;
1864
1865 sx_assert(&pmc_sx, SX_XLOCKED);
1866
1867 p = td->td_proc;
1868 pmc_getfilename(p->p_textvp, &fullpath, &freepath);
1869
1870 pk = (struct pmckern_procexec *) arg;
1871
1872 /* Inform owners of SS mode PMCs of the exec event. */
1873 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
1874 if (po->po_flags & PMC_PO_OWNS_LOGFILE)
1875 pmclog_process_procexec(po, PMC_ID_INVALID,
1876 p->p_pid, pk->pm_entryaddr, fullpath);
1877
1878 PROC_LOCK(p);
1879 is_using_hwpmcs = p->p_flag & P_HWPMC;
1880 PROC_UNLOCK(p);
1881
1882 if (!is_using_hwpmcs) {
1883 if (freepath)
1884 free(freepath, M_TEMP);
1885 break;
1886 }
1887
1888 /*
1889 * PMCs are not inherited across an exec(): remove any
1890 * PMCs that this process is the owner of.
1891 */
1892
1893 if ((po = pmc_find_owner_descriptor(p)) != NULL) {
1894 pmc_remove_owner(po);
1895 pmc_destroy_owner_descriptor(po);
1896 }
1897
1898 /*
1899 * If the process being exec'ed is not the target of any
1900 * PMC, we are done.
1901 */
1902 if ((pp = pmc_find_process_descriptor(p, 0)) == NULL) {
1903 if (freepath)
1904 free(freepath, M_TEMP);
1905 break;
1906 }
1907
1908 /*
1909 * Log the exec event to all monitoring owners. Skip
1910 * owners who have already recieved the event because
1911 * they had system sampling PMCs active.
1912 */
1913 for (ri = 0; ri < md->pmd_npmc; ri++)
1914 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) {
1915 po = pm->pm_owner;
1916 if (po->po_sscount == 0 &&
1917 po->po_flags & PMC_PO_OWNS_LOGFILE)
1918 pmclog_process_procexec(po, pm->pm_id,
1919 p->p_pid, pk->pm_entryaddr,
1920 fullpath);
1921 }
1922
1923 if (freepath)
1924 free(freepath, M_TEMP);
1925
1926
1927 PMCDBG(PRC,EXC,1, "exec proc=%p (%d, %s) cred-changed=%d",
1928 p, p->p_pid, p->p_comm, pk->pm_credentialschanged);
1929
1930 if (pk->pm_credentialschanged == 0) /* no change */
1931 break;
1932
1933 /*
1934 * If the newly exec()'ed process has a different credential
1935 * than before, allow it to be the target of a PMC only if
1936 * the PMC's owner has sufficient priviledge.
1937 */
1938
1939 for (ri = 0; ri < md->pmd_npmc; ri++)
1940 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL)
1941 if (pmc_can_attach(pm, td->td_proc) != 0)
1942 pmc_detach_one_process(td->td_proc,
1943 pm, PMC_FLAG_NONE);
1944
1945 KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= (int) md->pmd_npmc,
1946 ("[pmc,%d] Illegal ref count %d on pp %p", __LINE__,
1947 pp->pp_refcnt, pp));
1948
1949 /*
1950 * If this process is no longer the target of any
1951 * PMCs, we can remove the process entry and free
1952 * up space.
1953 */
1954
1955 if (pp->pp_refcnt == 0) {
1956 pmc_remove_process_descriptor(pp);
1957 free(pp, M_PMC);
1958 break;
1959 }
1960
1961 }
1962 break;
1963
1964 case PMC_FN_CSW_IN:
1965 pmc_process_csw_in(td);
1966 break;
1967
1968 case PMC_FN_CSW_OUT:
1969 pmc_process_csw_out(td);
1970 break;
1971
1972 /*
1973 * Process accumulated PC samples.
1974 *
1975 * This function is expected to be called by hardclock() for
1976 * each CPU that has accumulated PC samples.
1977 *
1978 * This function is to be executed on the CPU whose samples
1979 * are being processed.
1980 */
1981 case PMC_FN_DO_SAMPLES:
1982
1983 /*
1984 * Clear the cpu specific bit in the CPU mask before
1985 * do the rest of the processing. If the NMI handler
1986 * gets invoked after the "atomic_clear_int()" call
1987 * below but before "pmc_process_samples()" gets
1988 * around to processing the interrupt, then we will
1989 * come back here at the next hardclock() tick (and
1990 * may find nothing to do if "pmc_process_samples()"
1991 * had already processed the interrupt). We don't
1992 * lose the interrupt sample.
1993 */
1994 CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmc_cpumask);
1995 pmc_process_samples(PCPU_GET(cpuid));
1996 break;
1997
1998
1999 case PMC_FN_KLD_LOAD:
2000 sx_assert(&pmc_sx, SX_LOCKED);
2001 pmc_process_kld_load((struct pmckern_map_in *) arg);
2002 break;
2003
2004 case PMC_FN_KLD_UNLOAD:
2005 sx_assert(&pmc_sx, SX_LOCKED);
2006 pmc_process_kld_unload((struct pmckern_map_out *) arg);
2007 break;
2008
2009 case PMC_FN_MMAP:
2010 sx_assert(&pmc_sx, SX_LOCKED);
2011 pmc_process_mmap(td, (struct pmckern_map_in *) arg);
2012 break;
2013
2014 case PMC_FN_MUNMAP:
2015 sx_assert(&pmc_sx, SX_LOCKED);
2016 pmc_process_munmap(td, (struct pmckern_map_out *) arg);
2017 break;
2018
2019 case PMC_FN_USER_CALLCHAIN:
2020 /*
2021 * Record a call chain.
2022 */
2023 KASSERT(td == curthread, ("[pmc,%d] td != curthread",
2024 __LINE__));
2025 pmc_capture_user_callchain(PCPU_GET(cpuid),
2026 (struct trapframe *) arg);
2027 td->td_pflags &= ~TDP_CALLCHAIN;
2028 break;
2029
2030 default:
2031#ifdef DEBUG
2032 KASSERT(0, ("[pmc,%d] unknown hook %d\n", __LINE__, function));
2033#endif
2034 break;
2035
2036 }
2037
2038 return 0;
2039}
2040
2041/*
2042 * allocate a 'struct pmc_owner' descriptor in the owner hash table.
2043 */
2044
2045static struct pmc_owner *
2046pmc_allocate_owner_descriptor(struct proc *p)
2047{
2048 uint32_t hindex;
2049 struct pmc_owner *po;
2050 struct pmc_ownerhash *poh;
2051
2052 hindex = PMC_HASH_PTR(p, pmc_ownerhashmask);
2053 poh = &pmc_ownerhash[hindex];
2054
2055 /* allocate space for N pointers and one descriptor struct */
2056 po = malloc(sizeof(struct pmc_owner), M_PMC, M_WAITOK|M_ZERO);
2057 po->po_sscount = po->po_error = po->po_flags = po->po_logprocmaps = 0;
2058 po->po_file = NULL;
2059 po->po_owner = p;
2060 po->po_kthread = NULL;
2061 LIST_INIT(&po->po_pmcs);
2062 LIST_INSERT_HEAD(poh, po, po_next); /* insert into hash table */
2063
2064 TAILQ_INIT(&po->po_logbuffers);
2065 mtx_init(&po->po_mtx, "pmc-owner-mtx", "pmc-per-proc", MTX_SPIN);
2066
2067 PMCDBG(OWN,ALL,1, "allocate-owner proc=%p (%d, %s) pmc-owner=%p",
2068 p, p->p_pid, p->p_comm, po);
2069
2070 return po;
2071}
2072
2073static void
2074pmc_destroy_owner_descriptor(struct pmc_owner *po)
2075{
2076
2077 PMCDBG(OWN,REL,1, "destroy-owner po=%p proc=%p (%d, %s)",
2078 po, po->po_owner, po->po_owner->p_pid, po->po_owner->p_comm);
2079
2080 mtx_destroy(&po->po_mtx);
2081 free(po, M_PMC);
2082}
2083
2084/*
2085 * find the descriptor corresponding to process 'p', adding or removing it
2086 * as specified by 'mode'.
2087 */
2088
2089static struct pmc_process *
2090pmc_find_process_descriptor(struct proc *p, uint32_t mode)
2091{
2092 uint32_t hindex;
2093 struct pmc_process *pp, *ppnew;
2094 struct pmc_processhash *pph;
2095
2096 hindex = PMC_HASH_PTR(p, pmc_processhashmask);
2097 pph = &pmc_processhash[hindex];
2098
2099 ppnew = NULL;
2100
2101 /*
2102 * Pre-allocate memory in the FIND_ALLOCATE case since we
2103 * cannot call malloc(9) once we hold a spin lock.
2104 */
2105 if (mode & PMC_FLAG_ALLOCATE)
2106 ppnew = malloc(sizeof(struct pmc_process) + md->pmd_npmc *
2107 sizeof(struct pmc_targetstate), M_PMC, M_WAITOK|M_ZERO);
2108
2109 mtx_lock_spin(&pmc_processhash_mtx);
2110 LIST_FOREACH(pp, pph, pp_next)
2111 if (pp->pp_proc == p)
2112 break;
2113
2114 if ((mode & PMC_FLAG_REMOVE) && pp != NULL)
2115 LIST_REMOVE(pp, pp_next);
2116
2117 if ((mode & PMC_FLAG_ALLOCATE) && pp == NULL &&
2118 ppnew != NULL) {
2119 ppnew->pp_proc = p;
2120 LIST_INSERT_HEAD(pph, ppnew, pp_next);
2121 pp = ppnew;
2122 ppnew = NULL;
2123 }
2124 mtx_unlock_spin(&pmc_processhash_mtx);
2125
2126 if (pp != NULL && ppnew != NULL)
2127 free(ppnew, M_PMC);
2128
2129 return pp;
2130}
2131
2132/*
2133 * remove a process descriptor from the process hash table.
2134 */
2135
2136static void
2137pmc_remove_process_descriptor(struct pmc_process *pp)
2138{
2139 KASSERT(pp->pp_refcnt == 0,
2140 ("[pmc,%d] Removing process descriptor %p with count %d",
2141 __LINE__, pp, pp->pp_refcnt));
2142
2143 mtx_lock_spin(&pmc_processhash_mtx);
2144 LIST_REMOVE(pp, pp_next);
2145 mtx_unlock_spin(&pmc_processhash_mtx);
2146}
2147
2148
2149/*
2150 * find an owner descriptor corresponding to proc 'p'
2151 */
2152
2153static struct pmc_owner *
2154pmc_find_owner_descriptor(struct proc *p)
2155{
2156 uint32_t hindex;
2157 struct pmc_owner *po;
2158 struct pmc_ownerhash *poh;
2159
2160 hindex = PMC_HASH_PTR(p, pmc_ownerhashmask);
2161 poh = &pmc_ownerhash[hindex];
2162
2163 po = NULL;
2164 LIST_FOREACH(po, poh, po_next)
2165 if (po->po_owner == p)
2166 break;
2167
2168 PMCDBG(OWN,FND,1, "find-owner proc=%p (%d, %s) hindex=0x%x -> "
2169 "pmc-owner=%p", p, p->p_pid, p->p_comm, hindex, po);
2170
2171 return po;
2172}
2173
2174/*
2175 * pmc_allocate_pmc_descriptor
2176 *
2177 * Allocate a pmc descriptor and initialize its
2178 * fields.
2179 */
2180
2181static struct pmc *
2182pmc_allocate_pmc_descriptor(void)
2183{
2184 struct pmc *pmc;
2185
2186 pmc = malloc(sizeof(struct pmc), M_PMC, M_WAITOK|M_ZERO);
2187
2188 if (pmc != NULL) {
2189 pmc->pm_owner = NULL;
2190 LIST_INIT(&pmc->pm_targets);
2191 }
2192
2193 PMCDBG(PMC,ALL,1, "allocate-pmc -> pmc=%p", pmc);
2194
2195 return pmc;
2196}
2197
2198/*
2199 * Destroy a pmc descriptor.
2200 */
2201
2202static void
2203pmc_destroy_pmc_descriptor(struct pmc *pm)
2204{
2205 (void) pm;
2206
2207#ifdef DEBUG
2208 KASSERT(pm->pm_state == PMC_STATE_DELETED ||
2209 pm->pm_state == PMC_STATE_FREE,
2210 ("[pmc,%d] destroying non-deleted PMC", __LINE__));
2211 KASSERT(LIST_EMPTY(&pm->pm_targets),
2212 ("[pmc,%d] destroying pmc with targets", __LINE__));
2213 KASSERT(pm->pm_owner == NULL,
2214 ("[pmc,%d] destroying pmc attached to an owner", __LINE__));
2215 KASSERT(pm->pm_runcount == 0,
2216 ("[pmc,%d] pmc has non-zero run count %d", __LINE__,
2217 pm->pm_runcount));
2218#endif
2219}
2220
2221static void
2222pmc_wait_for_pmc_idle(struct pmc *pm)
2223{
2224#ifdef DEBUG
2225 volatile int maxloop;
2226
2227 maxloop = 100 * pmc_cpu_max();
2228#endif
2229
2230 /*
2231 * Loop (with a forced context switch) till the PMC's runcount
2232 * comes down to zero.
2233 */
2234 while (atomic_load_acq_32(&pm->pm_runcount) > 0) {
2235#ifdef DEBUG
2236 maxloop--;
2237 KASSERT(maxloop > 0,
2238 ("[pmc,%d] (ri%d, rc%d) waiting too long for "
2239 "pmc to be free", __LINE__,
2240 PMC_TO_ROWINDEX(pm), pm->pm_runcount));
2241#endif
2242 pmc_force_context_switch();
2243 }
2244}
2245
2246/*
2247 * This function does the following things:
2248 *
2249 * - detaches the PMC from hardware
2250 * - unlinks all target threads that were attached to it
2251 * - removes the PMC from its owner's list
2252 * - destroy's the PMC private mutex
2253 *
2254 * Once this function completes, the given pmc pointer can be safely
2255 * FREE'd by the caller.
2256 */
2257
2258static void
2259pmc_release_pmc_descriptor(struct pmc *pm)
2260{
2261 enum pmc_mode mode;
2262 struct pmc_hw *phw;
2263 u_int adjri, ri, cpu;
2264 struct pmc_owner *po;
2265 struct pmc_binding pb;
2266 struct pmc_process *pp;
2267 struct pmc_classdep *pcd;
2268 struct pmc_target *ptgt, *tmp;
2269
2270 sx_assert(&pmc_sx, SX_XLOCKED);
2271
2272 KASSERT(pm, ("[pmc,%d] null pmc", __LINE__));
2273
2274 ri = PMC_TO_ROWINDEX(pm);
2275 pcd = pmc_ri_to_classdep(md, ri, &adjri);
2276 mode = PMC_TO_MODE(pm);
2277
2278 PMCDBG(PMC,REL,1, "release-pmc pmc=%p ri=%d mode=%d", pm, ri,
2279 mode);
2280
2281 /*
2282 * First, we take the PMC off hardware.
2283 */
2284 cpu = 0;
2285 if (PMC_IS_SYSTEM_MODE(mode)) {
2286
2287 /*
2288 * A system mode PMC runs on a specific CPU. Switch
2289 * to this CPU and turn hardware off.
2290 */
2291 pmc_save_cpu_binding(&pb);
2292
2293 cpu = PMC_TO_CPU(pm);
2294
2295 pmc_select_cpu(cpu);
2296
2297 /* switch off non-stalled CPUs */
2298 if (pm->pm_state == PMC_STATE_RUNNING &&
2299 pm->pm_stalled == 0) {
2300
2301 phw = pmc_pcpu[cpu]->pc_hwpmcs[ri];
2302
2303 KASSERT(phw->phw_pmc == pm,
2304 ("[pmc, %d] pmc ptr ri(%d) hw(%p) pm(%p)",
2305 __LINE__, ri, phw->phw_pmc, pm));
2306 PMCDBG(PMC,REL,2, "stopping cpu=%d ri=%d", cpu, ri);
2307
2308 critical_enter();
2309 pcd->pcd_stop_pmc(cpu, adjri);
2310 critical_exit();
2311 }
2312
2313 PMCDBG(PMC,REL,2, "decfg cpu=%d ri=%d", cpu, ri);
2314
2315 critical_enter();
2316 pcd->pcd_config_pmc(cpu, adjri, NULL);
2317 critical_exit();
2318
2319 /* adjust the global and process count of SS mode PMCs */
2320 if (mode == PMC_MODE_SS && pm->pm_state == PMC_STATE_RUNNING) {
2321 po = pm->pm_owner;
2322 po->po_sscount--;
2323 if (po->po_sscount == 0) {
2324 atomic_subtract_rel_int(&pmc_ss_count, 1);
2325 LIST_REMOVE(po, po_ssnext);
2326 }
2327 }
2328
2329 pm->pm_state = PMC_STATE_DELETED;
2330
2331 pmc_restore_cpu_binding(&pb);
2332
2333 /*
2334 * We could have references to this PMC structure in
2335 * the per-cpu sample queues. Wait for the queue to
2336 * drain.
2337 */
2338 pmc_wait_for_pmc_idle(pm);
2339
2340 } else if (PMC_IS_VIRTUAL_MODE(mode)) {
2341
2342 /*
2343 * A virtual PMC could be running on multiple CPUs at
2344 * a given instant.
2345 *
2346 * By marking its state as DELETED, we ensure that
2347 * this PMC is never further scheduled on hardware.
2348 *
2349 * Then we wait till all CPUs are done with this PMC.
2350 */
2351 pm->pm_state = PMC_STATE_DELETED;
2352
2353
2354 /* Wait for the PMCs runcount to come to zero. */
2355 pmc_wait_for_pmc_idle(pm);
2356
2357 /*
2358 * At this point the PMC is off all CPUs and cannot be
2359 * freshly scheduled onto a CPU. It is now safe to
2360 * unlink all targets from this PMC. If a
2361 * process-record's refcount falls to zero, we remove
2362 * it from the hash table. The module-wide SX lock
2363 * protects us from races.
2364 */
2365 LIST_FOREACH_SAFE(ptgt, &pm->pm_targets, pt_next, tmp) {
2366 pp = ptgt->pt_process;
2367 pmc_unlink_target_process(pm, pp); /* frees 'ptgt' */
2368
2369 PMCDBG(PMC,REL,3, "pp->refcnt=%d", pp->pp_refcnt);
2370
2371 /*
2372 * If the target process record shows that no
2373 * PMCs are attached to it, reclaim its space.
2374 */
2375
2376 if (pp->pp_refcnt == 0) {
2377 pmc_remove_process_descriptor(pp);
2378 free(pp, M_PMC);
2379 }
2380 }
2381
2382 cpu = curthread->td_oncpu; /* setup cpu for pmd_release() */
2383
2384 }
2385
2386 /*
2387 * Release any MD resources
2388 */
2389 (void) pcd->pcd_release_pmc(cpu, adjri, pm);
2390
2391 /*
2392 * Update row disposition
2393 */
2394
2395 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm)))
2396 PMC_UNMARK_ROW_STANDALONE(ri);
2397 else
2398 PMC_UNMARK_ROW_THREAD(ri);
2399
2400 /* unlink from the owner's list */
2401 if (pm->pm_owner) {
2402 LIST_REMOVE(pm, pm_next);
2403 pm->pm_owner = NULL;
2404 }
2405
2406 pmc_destroy_pmc_descriptor(pm);
2407}
2408
2409/*
2410 * Register an owner and a pmc.
2411 */
2412
2413static int
2414pmc_register_owner(struct proc *p, struct pmc *pmc)
2415{
2416 struct pmc_owner *po;
2417
2418 sx_assert(&pmc_sx, SX_XLOCKED);
2419
2420 if ((po = pmc_find_owner_descriptor(p)) == NULL)
2421 if ((po = pmc_allocate_owner_descriptor(p)) == NULL)
2422 return ENOMEM;
2423
2424 KASSERT(pmc->pm_owner == NULL,
2425 ("[pmc,%d] attempting to own an initialized PMC", __LINE__));
2426 pmc->pm_owner = po;
2427
2428 LIST_INSERT_HEAD(&po->po_pmcs, pmc, pm_next);
2429
2430 PROC_LOCK(p);
2431 p->p_flag |= P_HWPMC;
2432 PROC_UNLOCK(p);
2433
2434 if (po->po_flags & PMC_PO_OWNS_LOGFILE)
2435 pmclog_process_pmcallocate(pmc);
2436
2437 PMCDBG(PMC,REG,1, "register-owner pmc-owner=%p pmc=%p",
2438 po, pmc);
2439
2440 return 0;
2441}
2442
2443/*
2444 * Return the current row disposition:
2445 * == 0 => FREE
2446 * > 0 => PROCESS MODE
2447 * < 0 => SYSTEM MODE
2448 */
2449
2450int
2451pmc_getrowdisp(int ri)
2452{
2453 return pmc_pmcdisp[ri];
2454}
2455
2456/*
2457 * Check if a PMC at row index 'ri' can be allocated to the current
2458 * process.
2459 *
2460 * Allocation can fail if:
2461 * - the current process is already being profiled by a PMC at index 'ri',
2462 * attached to it via OP_PMCATTACH.
2463 * - the current process has already allocated a PMC at index 'ri'
2464 * via OP_ALLOCATE.
2465 */
2466
2467static int
2468pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu)
2469{
2470 enum pmc_mode mode;
2471 struct pmc *pm;
2472 struct pmc_owner *po;
2473 struct pmc_process *pp;
2474
2475 PMCDBG(PMC,ALR,1, "can-allocate-rowindex proc=%p (%d, %s) ri=%d "
2476 "cpu=%d", p, p->p_pid, p->p_comm, ri, cpu);
2477
2478 /*
2479 * We shouldn't have already allocated a process-mode PMC at
2480 * row index 'ri'.
2481 *
2482 * We shouldn't have allocated a system-wide PMC on the same
2483 * CPU and same RI.
2484 */
2485 if ((po = pmc_find_owner_descriptor(p)) != NULL)
2486 LIST_FOREACH(pm, &po->po_pmcs, pm_next) {
2487 if (PMC_TO_ROWINDEX(pm) == ri) {
2488 mode = PMC_TO_MODE(pm);
2489 if (PMC_IS_VIRTUAL_MODE(mode))
2490 return EEXIST;
2491 if (PMC_IS_SYSTEM_MODE(mode) &&
2492 (int) PMC_TO_CPU(pm) == cpu)
2493 return EEXIST;
2494 }
2495 }
2496
2497 /*
2498 * We also shouldn't be the target of any PMC at this index
2499 * since otherwise a PMC_ATTACH to ourselves will fail.
2500 */
2501 if ((pp = pmc_find_process_descriptor(p, 0)) != NULL)
2502 if (pp->pp_pmcs[ri].pp_pmc)
2503 return EEXIST;
2504
2505 PMCDBG(PMC,ALR,2, "can-allocate-rowindex proc=%p (%d, %s) ri=%d ok",
2506 p, p->p_pid, p->p_comm, ri);
2507
2508 return 0;
2509}
2510
2511/*
2512 * Check if a given PMC at row index 'ri' can be currently used in
2513 * mode 'mode'.
2514 */
2515
2516static int
2517pmc_can_allocate_row(int ri, enum pmc_mode mode)
2518{
2519 enum pmc_disp disp;
2520
2521 sx_assert(&pmc_sx, SX_XLOCKED);
2522
2523 PMCDBG(PMC,ALR,1, "can-allocate-row ri=%d mode=%d", ri, mode);
2524
2525 if (PMC_IS_SYSTEM_MODE(mode))
2526 disp = PMC_DISP_STANDALONE;
2527 else
2528 disp = PMC_DISP_THREAD;
2529
2530 /*
2531 * check disposition for PMC row 'ri':
2532 *
2533 * Expected disposition Row-disposition Result
2534 *
2535 * STANDALONE STANDALONE or FREE proceed
2536 * STANDALONE THREAD fail
2537 * THREAD THREAD or FREE proceed
2538 * THREAD STANDALONE fail
2539 */
2540
2541 if (!PMC_ROW_DISP_IS_FREE(ri) &&
2542 !(disp == PMC_DISP_THREAD && PMC_ROW_DISP_IS_THREAD(ri)) &&
2543 !(disp == PMC_DISP_STANDALONE && PMC_ROW_DISP_IS_STANDALONE(ri)))
2544 return EBUSY;
2545
2546 /*
2547 * All OK
2548 */
2549
2550 PMCDBG(PMC,ALR,2, "can-allocate-row ri=%d mode=%d ok", ri, mode);
2551
2552 return 0;
2553
2554}
2555
2556/*
2557 * Find a PMC descriptor with user handle 'pmcid' for thread 'td'.
2558 */
2559
2560static struct pmc *
2561pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, pmc_id_t pmcid)
2562{
2563 struct pmc *pm;
2564
2565 KASSERT(PMC_ID_TO_ROWINDEX(pmcid) < md->pmd_npmc,
2566 ("[pmc,%d] Illegal pmc index %d (max %d)", __LINE__,
2567 PMC_ID_TO_ROWINDEX(pmcid), md->pmd_npmc));
2568
2569 LIST_FOREACH(pm, &po->po_pmcs, pm_next)
2570 if (pm->pm_id == pmcid)
2571 return pm;
2572
2573 return NULL;
2574}
2575
2576static int
2577pmc_find_pmc(pmc_id_t pmcid, struct pmc **pmc)
2578{
2579
2580 struct pmc *pm;
2581 struct pmc_owner *po;
2582
2583 PMCDBG(PMC,FND,1, "find-pmc id=%d", pmcid);
2584
2585 if ((po = pmc_find_owner_descriptor(curthread->td_proc)) == NULL)
2586 return ESRCH;
2587
2588 if ((pm = pmc_find_pmc_descriptor_in_process(po, pmcid)) == NULL)
2589 return EINVAL;
2590
2591 PMCDBG(PMC,FND,2, "find-pmc id=%d -> pmc=%p", pmcid, pm);
2592
2593 *pmc = pm;
2594 return 0;
2595}
2596
2597/*
2598 * Start a PMC.
2599 */
2600
2601static int
2602pmc_start(struct pmc *pm)
2603{
2604 enum pmc_mode mode;
2605 struct pmc_owner *po;
2606 struct pmc_binding pb;
2607 struct pmc_classdep *pcd;
2608 int adjri, error, cpu, ri;
2609
2610 KASSERT(pm != NULL,
2611 ("[pmc,%d] null pm", __LINE__));
2612
2613 mode = PMC_TO_MODE(pm);
2614 ri = PMC_TO_ROWINDEX(pm);
2615 pcd = pmc_ri_to_classdep(md, ri, &adjri);
2616
2617 error = 0;
2618
2619 PMCDBG(PMC,OPS,1, "start pmc=%p mode=%d ri=%d", pm, mode, ri);
2620
2621 po = pm->pm_owner;
2622
2623 /*
2624 * Disallow PMCSTART if a logfile is required but has not been
2625 * configured yet.
2626 */
2627 if ((pm->pm_flags & PMC_F_NEEDS_LOGFILE) &&
2628 (po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
2629 return (EDOOFUS); /* programming error */
2630
2631 /*
2632 * If this is a sampling mode PMC, log mapping information for
2633 * the kernel modules that are currently loaded.
2634 */
2635 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
2636 pmc_log_kernel_mappings(pm);
2637
2638 if (PMC_IS_VIRTUAL_MODE(mode)) {
2639
2640 /*
2641 * If a PMCATTACH has never been done on this PMC,
2642 * attach it to its owner process.
2643 */
2644
2645 if (LIST_EMPTY(&pm->pm_targets))
2646 error = (pm->pm_flags & PMC_F_ATTACH_DONE) ? ESRCH :
2647 pmc_attach_process(po->po_owner, pm);
2648
2649 /*
2650 * If the PMC is attached to its owner, then force a context
2651 * switch to ensure that the MD state gets set correctly.
2652 */
2653
2654 if (error == 0) {
2655 pm->pm_state = PMC_STATE_RUNNING;
2656 if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER)
2657 pmc_force_context_switch();
2658 }
2659
2660 return (error);
2661 }
2662
2663
2664 /*
2665 * A system-wide PMC.
2666 *
2667 * Add the owner to the global list if this is a system-wide
2668 * sampling PMC.
2669 */
2670
2671 if (mode == PMC_MODE_SS) {
2672 if (po->po_sscount == 0) {
2673 LIST_INSERT_HEAD(&pmc_ss_owners, po, po_ssnext);
2674 atomic_add_rel_int(&pmc_ss_count, 1);
2675 PMCDBG(PMC,OPS,1, "po=%p in global list", po);
2676 }
2677 po->po_sscount++;
2678
2679 /*
2680 * Log mapping information for all existing processes in the
2681 * system. Subsequent mappings are logged as they happen;
2682 * see pmc_process_mmap().
2683 */
2684 if (po->po_logprocmaps == 0) {
2685 pmc_log_all_process_mappings(po);
2686 po->po_logprocmaps = 1;
2687 }
2688 }
2689
2690 /*
2691 * Move to the CPU associated with this
2692 * PMC, and start the hardware.
2693 */
2694
2695 pmc_save_cpu_binding(&pb);
2696
2697 cpu = PMC_TO_CPU(pm);
2698
2699 if (!pmc_cpu_is_active(cpu))
2700 return (ENXIO);
2701
2702 pmc_select_cpu(cpu);
2703
2704 /*
2705 * global PMCs are configured at allocation time
2706 * so write out the initial value and start the PMC.
2707 */
2708
2709 pm->pm_state = PMC_STATE_RUNNING;
2710
2711 critical_enter();
2712 if ((error = pcd->pcd_write_pmc(cpu, adjri,
2713 PMC_IS_SAMPLING_MODE(mode) ?
2714 pm->pm_sc.pm_reloadcount :
2715 pm->pm_sc.pm_initial)) == 0)
2716 error = pcd->pcd_start_pmc(cpu, adjri);
2717 critical_exit();
2718
2719 pmc_restore_cpu_binding(&pb);
2720
2721 return (error);
2722}
2723
2724/*
2725 * Stop a PMC.
2726 */
2727
2728static int
2729pmc_stop(struct pmc *pm)
2730{
2731 struct pmc_owner *po;
2732 struct pmc_binding pb;
2733 struct pmc_classdep *pcd;
2734 int adjri, cpu, error, ri;
2735
2736 KASSERT(pm != NULL, ("[pmc,%d] null pmc", __LINE__));
2737
2738 PMCDBG(PMC,OPS,1, "stop pmc=%p mode=%d ri=%d", pm,
2739 PMC_TO_MODE(pm), PMC_TO_ROWINDEX(pm));
2740
2741 pm->pm_state = PMC_STATE_STOPPED;
2742
2743 /*
2744 * If the PMC is a virtual mode one, changing the state to
2745 * non-RUNNING is enough to ensure that the PMC never gets
2746 * scheduled.
2747 *
2748 * If this PMC is current running on a CPU, then it will
2749 * handled correctly at the time its target process is context
2750 * switched out.
2751 */
2752
2753 if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)))
2754 return 0;
2755
2756 /*
2757 * A system-mode PMC. Move to the CPU associated with
2758 * this PMC, and stop the hardware. We update the
2759 * 'initial count' so that a subsequent PMCSTART will
2760 * resume counting from the current hardware count.
2761 */
2762
2763 pmc_save_cpu_binding(&pb);
2764
2765 cpu = PMC_TO_CPU(pm);
2766
2767 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
2768 ("[pmc,%d] illegal cpu=%d", __LINE__, cpu));
2769
2770 if (!pmc_cpu_is_active(cpu))
2771 return ENXIO;
2772
2773 pmc_select_cpu(cpu);
2774
2775 ri = PMC_TO_ROWINDEX(pm);
2776 pcd = pmc_ri_to_classdep(md, ri, &adjri);
2777
2778 critical_enter();
2779 if ((error = pcd->pcd_stop_pmc(cpu, adjri)) == 0)
2780 error = pcd->pcd_read_pmc(cpu, adjri, &pm->pm_sc.pm_initial);
2781 critical_exit();
2782
2783 pmc_restore_cpu_binding(&pb);
2784
2785 po = pm->pm_owner;
2786
2787 /* remove this owner from the global list of SS PMC owners */
2788 if (PMC_TO_MODE(pm) == PMC_MODE_SS) {
2789 po->po_sscount--;
2790 if (po->po_sscount == 0) {
2791 atomic_subtract_rel_int(&pmc_ss_count, 1);
2792 LIST_REMOVE(po, po_ssnext);
2793 PMCDBG(PMC,OPS,2,"po=%p removed from global list", po);
2794 }
2795 }
2796
2797 return (error);
2798}
2799
2800
2801#ifdef DEBUG
2802static const char *pmc_op_to_name[] = {
2803#undef __PMC_OP
2804#define __PMC_OP(N, D) #N ,
2805 __PMC_OPS()
2806 NULL
2807};
2808#endif
2809
2810/*
2811 * The syscall interface
2812 */
2813
2814#define PMC_GET_SX_XLOCK(...) do { \
2815 sx_xlock(&pmc_sx); \
2816 if (pmc_hook == NULL) { \
2817 sx_xunlock(&pmc_sx); \
2818 return __VA_ARGS__; \
2819 } \
2820} while (0)
2821
2822#define PMC_DOWNGRADE_SX() do { \
2823 sx_downgrade(&pmc_sx); \
2824 is_sx_downgraded = 1; \
2825} while (0)
2826
2827static int
2828pmc_syscall_handler(struct thread *td, void *syscall_args)
2829{
2830 int error, is_sx_downgraded, is_sx_locked, op;
2831 struct pmc_syscall_args *c;
2832 void *arg;
2833
2834 PMC_GET_SX_XLOCK(ENOSYS);
2835
2836 DROP_GIANT();
2837
2838 is_sx_downgraded = 0;
2839 is_sx_locked = 1;
2840
2841 c = (struct pmc_syscall_args *) syscall_args;
2842
2843 op = c->pmop_code;
2844 arg = c->pmop_data;
2845
2846 PMCDBG(MOD,PMS,1, "syscall op=%d \"%s\" arg=%p", op,
2847 pmc_op_to_name[op], arg);
2848
2849 error = 0;
2850 atomic_add_int(&pmc_stats.pm_syscalls, 1);
2851
2852 switch(op)
2853 {
2854
2855
2856 /*
2857 * Configure a log file.
2858 *
2859 * XXX This OP will be reworked.
2860 */
2861
2862 case PMC_OP_CONFIGURELOG:
2863 {
2864 struct proc *p;
2865 struct pmc *pm;
2866 struct pmc_owner *po;
2867 struct pmc_op_configurelog cl;
2868
2869 sx_assert(&pmc_sx, SX_XLOCKED);
2870
2871 if ((error = copyin(arg, &cl, sizeof(cl))) != 0)
2872 break;
2873
2874 /* mark this process as owning a log file */
2875 p = td->td_proc;
2876 if ((po = pmc_find_owner_descriptor(p)) == NULL)
2877 if ((po = pmc_allocate_owner_descriptor(p)) == NULL) {
2878 error = ENOMEM;
2879 break;
2880 }
2881
2882 /*
2883 * If a valid fd was passed in, try to configure that,
2884 * otherwise if 'fd' was less than zero and there was
2885 * a log file configured, flush its buffers and
2886 * de-configure it.
2887 */
2888 if (cl.pm_logfd >= 0) {
2889 sx_xunlock(&pmc_sx);
2890 is_sx_locked = 0;
2891 error = pmclog_configure_log(md, po, cl.pm_logfd);
2892 } else if (po->po_flags & PMC_PO_OWNS_LOGFILE) {
2893 pmclog_process_closelog(po);
2894 error = pmclog_flush(po);
2894 error = pmclog_close(po);
2895 if (error == 0) {
2896 LIST_FOREACH(pm, &po->po_pmcs, pm_next)
2897 if (pm->pm_flags & PMC_F_NEEDS_LOGFILE &&
2898 pm->pm_state == PMC_STATE_RUNNING)
2899 pmc_stop(pm);
2900 error = pmclog_deconfigure_log(po);
2901 }
2902 } else
2903 error = EINVAL;
2904
2905 if (error)
2906 break;
2907 }
2908 break;
2909
2895 if (error == 0) {
2896 LIST_FOREACH(pm, &po->po_pmcs, pm_next)
2897 if (pm->pm_flags & PMC_F_NEEDS_LOGFILE &&
2898 pm->pm_state == PMC_STATE_RUNNING)
2899 pmc_stop(pm);
2900 error = pmclog_deconfigure_log(po);
2901 }
2902 } else
2903 error = EINVAL;
2904
2905 if (error)
2906 break;
2907 }
2908 break;
2909
2910
2911 /*
2912 * Flush a log file.
2913 */
2914
2915 case PMC_OP_FLUSHLOG:
2916 {
2917 struct pmc_owner *po;
2918
2919 sx_assert(&pmc_sx, SX_XLOCKED);
2920
2921 if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) {
2922 error = EINVAL;
2923 break;
2924 }
2925
2926 error = pmclog_flush(po);
2927 }
2928 break;
2929
2930 /*
2910 /*
2911 * Flush a log file.
2912 */
2913
2914 case PMC_OP_FLUSHLOG:
2915 {
2916 struct pmc_owner *po;
2917
2918 sx_assert(&pmc_sx, SX_XLOCKED);
2919
2920 if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) {
2921 error = EINVAL;
2922 break;
2923 }
2924
2925 error = pmclog_flush(po);
2926 }
2927 break;
2928
2929 /*
2930 * Close a log file.
2931 */
2932
2933 case PMC_OP_CLOSELOG:
2934 {
2935 struct pmc_owner *po;
2936
2937 sx_assert(&pmc_sx, SX_XLOCKED);
2938
2939 if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) {
2940 error = EINVAL;
2941 break;
2942 }
2943
2944 error = pmclog_close(po);
2945 }
2946 break;
2947
2948 /*
2931 * Retrieve hardware configuration.
2932 */
2933
2934 case PMC_OP_GETCPUINFO: /* CPU information */
2935 {
2936 struct pmc_op_getcpuinfo gci;
2937 struct pmc_classinfo *pci;
2938 struct pmc_classdep *pcd;
2939 int cl;
2940
2941 gci.pm_cputype = md->pmd_cputype;
2942 gci.pm_ncpu = pmc_cpu_max();
2943 gci.pm_npmc = md->pmd_npmc;
2944 gci.pm_nclass = md->pmd_nclass;
2945 pci = gci.pm_classes;
2946 pcd = md->pmd_classdep;
2947 for (cl = 0; cl < md->pmd_nclass; cl++, pci++, pcd++) {
2948 pci->pm_caps = pcd->pcd_caps;
2949 pci->pm_class = pcd->pcd_class;
2950 pci->pm_width = pcd->pcd_width;
2951 pci->pm_num = pcd->pcd_num;
2952 }
2953 error = copyout(&gci, arg, sizeof(gci));
2954 }
2955 break;
2956
2957
2958 /*
2959 * Get module statistics
2960 */
2961
2962 case PMC_OP_GETDRIVERSTATS:
2963 {
2964 struct pmc_op_getdriverstats gms;
2965
2966 bcopy(&pmc_stats, &gms, sizeof(gms));
2967 error = copyout(&gms, arg, sizeof(gms));
2968 }
2969 break;
2970
2971
2972 /*
2973 * Retrieve module version number
2974 */
2975
2976 case PMC_OP_GETMODULEVERSION:
2977 {
2978 uint32_t cv, modv;
2979
2980 /* retrieve the client's idea of the ABI version */
2981 if ((error = copyin(arg, &cv, sizeof(uint32_t))) != 0)
2982 break;
2983 /* don't service clients newer than our driver */
2984 modv = PMC_VERSION;
2985 if ((cv & 0xFFFF0000) > (modv & 0xFFFF0000)) {
2986 error = EPROGMISMATCH;
2987 break;
2988 }
2989 error = copyout(&modv, arg, sizeof(int));
2990 }
2991 break;
2992
2993
2994 /*
2995 * Retrieve the state of all the PMCs on a given
2996 * CPU.
2997 */
2998
2999 case PMC_OP_GETPMCINFO:
3000 {
3001 int ari;
3002 struct pmc *pm;
3003 size_t pmcinfo_size;
3004 uint32_t cpu, n, npmc;
3005 struct pmc_owner *po;
3006 struct pmc_binding pb;
3007 struct pmc_classdep *pcd;
3008 struct pmc_info *p, *pmcinfo;
3009 struct pmc_op_getpmcinfo *gpi;
3010
3011 PMC_DOWNGRADE_SX();
3012
3013 gpi = (struct pmc_op_getpmcinfo *) arg;
3014
3015 if ((error = copyin(&gpi->pm_cpu, &cpu, sizeof(cpu))) != 0)
3016 break;
3017
3018 if (cpu >= pmc_cpu_max()) {
3019 error = EINVAL;
3020 break;
3021 }
3022
3023 if (!pmc_cpu_is_active(cpu)) {
3024 error = ENXIO;
3025 break;
3026 }
3027
3028 /* switch to CPU 'cpu' */
3029 pmc_save_cpu_binding(&pb);
3030 pmc_select_cpu(cpu);
3031
3032 npmc = md->pmd_npmc;
3033
3034 pmcinfo_size = npmc * sizeof(struct pmc_info);
3035 pmcinfo = malloc(pmcinfo_size, M_PMC, M_WAITOK);
3036
3037 p = pmcinfo;
3038
3039 for (n = 0; n < md->pmd_npmc; n++, p++) {
3040
3041 pcd = pmc_ri_to_classdep(md, n, &ari);
3042
3043 KASSERT(pcd != NULL,
3044 ("[pmc,%d] null pcd ri=%d", __LINE__, n));
3045
3046 if ((error = pcd->pcd_describe(cpu, ari, p, &pm)) != 0)
3047 break;
3048
3049 if (PMC_ROW_DISP_IS_STANDALONE(n))
3050 p->pm_rowdisp = PMC_DISP_STANDALONE;
3051 else if (PMC_ROW_DISP_IS_THREAD(n))
3052 p->pm_rowdisp = PMC_DISP_THREAD;
3053 else
3054 p->pm_rowdisp = PMC_DISP_FREE;
3055
3056 p->pm_ownerpid = -1;
3057
3058 if (pm == NULL) /* no PMC associated */
3059 continue;
3060
3061 po = pm->pm_owner;
3062
3063 KASSERT(po->po_owner != NULL,
3064 ("[pmc,%d] pmc_owner had a null proc pointer",
3065 __LINE__));
3066
3067 p->pm_ownerpid = po->po_owner->p_pid;
3068 p->pm_mode = PMC_TO_MODE(pm);
3069 p->pm_event = pm->pm_event;
3070 p->pm_flags = pm->pm_flags;
3071
3072 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
3073 p->pm_reloadcount =
3074 pm->pm_sc.pm_reloadcount;
3075 }
3076
3077 pmc_restore_cpu_binding(&pb);
3078
3079 /* now copy out the PMC info collected */
3080 if (error == 0)
3081 error = copyout(pmcinfo, &gpi->pm_pmcs, pmcinfo_size);
3082
3083 free(pmcinfo, M_PMC);
3084 }
3085 break;
3086
3087
3088 /*
3089 * Set the administrative state of a PMC. I.e. whether
3090 * the PMC is to be used or not.
3091 */
3092
3093 case PMC_OP_PMCADMIN:
3094 {
3095 int cpu, ri;
3096 enum pmc_state request;
3097 struct pmc_cpu *pc;
3098 struct pmc_hw *phw;
3099 struct pmc_op_pmcadmin pma;
3100 struct pmc_binding pb;
3101
3102 sx_assert(&pmc_sx, SX_XLOCKED);
3103
3104 KASSERT(td == curthread,
3105 ("[pmc,%d] td != curthread", __LINE__));
3106
3107 error = priv_check(td, PRIV_PMC_MANAGE);
3108 if (error)
3109 break;
3110
3111 if ((error = copyin(arg, &pma, sizeof(pma))) != 0)
3112 break;
3113
3114 cpu = pma.pm_cpu;
3115
3116 if (cpu < 0 || cpu >= (int) pmc_cpu_max()) {
3117 error = EINVAL;
3118 break;
3119 }
3120
3121 if (!pmc_cpu_is_active(cpu)) {
3122 error = ENXIO;
3123 break;
3124 }
3125
3126 request = pma.pm_state;
3127
3128 if (request != PMC_STATE_DISABLED &&
3129 request != PMC_STATE_FREE) {
3130 error = EINVAL;
3131 break;
3132 }
3133
3134 ri = pma.pm_pmc; /* pmc id == row index */
3135 if (ri < 0 || ri >= (int) md->pmd_npmc) {
3136 error = EINVAL;
3137 break;
3138 }
3139
3140 /*
3141 * We can't disable a PMC with a row-index allocated
3142 * for process virtual PMCs.
3143 */
3144
3145 if (PMC_ROW_DISP_IS_THREAD(ri) &&
3146 request == PMC_STATE_DISABLED) {
3147 error = EBUSY;
3148 break;
3149 }
3150
3151 /*
3152 * otherwise, this PMC on this CPU is either free or
3153 * in system-wide mode.
3154 */
3155
3156 pmc_save_cpu_binding(&pb);
3157 pmc_select_cpu(cpu);
3158
3159 pc = pmc_pcpu[cpu];
3160 phw = pc->pc_hwpmcs[ri];
3161
3162 /*
3163 * XXX do we need some kind of 'forced' disable?
3164 */
3165
3166 if (phw->phw_pmc == NULL) {
3167 if (request == PMC_STATE_DISABLED &&
3168 (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED)) {
3169 phw->phw_state &= ~PMC_PHW_FLAG_IS_ENABLED;
3170 PMC_MARK_ROW_STANDALONE(ri);
3171 } else if (request == PMC_STATE_FREE &&
3172 (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0) {
3173 phw->phw_state |= PMC_PHW_FLAG_IS_ENABLED;
3174 PMC_UNMARK_ROW_STANDALONE(ri);
3175 }
3176 /* other cases are a no-op */
3177 } else
3178 error = EBUSY;
3179
3180 pmc_restore_cpu_binding(&pb);
3181 }
3182 break;
3183
3184
3185 /*
3186 * Allocate a PMC.
3187 */
3188
3189 case PMC_OP_PMCALLOCATE:
3190 {
3191 int adjri, n;
3192 u_int cpu;
3193 uint32_t caps;
3194 struct pmc *pmc;
3195 enum pmc_mode mode;
3196 struct pmc_hw *phw;
3197 struct pmc_binding pb;
3198 struct pmc_classdep *pcd;
3199 struct pmc_op_pmcallocate pa;
3200
3201 if ((error = copyin(arg, &pa, sizeof(pa))) != 0)
3202 break;
3203
3204 caps = pa.pm_caps;
3205 mode = pa.pm_mode;
3206 cpu = pa.pm_cpu;
3207
3208 if ((mode != PMC_MODE_SS && mode != PMC_MODE_SC &&
3209 mode != PMC_MODE_TS && mode != PMC_MODE_TC) ||
3210 (cpu != (u_int) PMC_CPU_ANY && cpu >= pmc_cpu_max())) {
3211 error = EINVAL;
3212 break;
3213 }
3214
3215 /*
3216 * Virtual PMCs should only ask for a default CPU.
3217 * System mode PMCs need to specify a non-default CPU.
3218 */
3219
3220 if ((PMC_IS_VIRTUAL_MODE(mode) && cpu != (u_int) PMC_CPU_ANY) ||
3221 (PMC_IS_SYSTEM_MODE(mode) && cpu == (u_int) PMC_CPU_ANY)) {
3222 error = EINVAL;
3223 break;
3224 }
3225
3226 /*
3227 * Check that an inactive CPU is not being asked for.
3228 */
3229
3230 if (PMC_IS_SYSTEM_MODE(mode) && !pmc_cpu_is_active(cpu)) {
3231 error = ENXIO;
3232 break;
3233 }
3234
3235 /*
3236 * Refuse an allocation for a system-wide PMC if this
3237 * process has been jailed, or if this process lacks
3238 * super-user credentials and the sysctl tunable
3239 * 'security.bsd.unprivileged_syspmcs' is zero.
3240 */
3241
3242 if (PMC_IS_SYSTEM_MODE(mode)) {
3243 if (jailed(curthread->td_ucred)) {
3244 error = EPERM;
3245 break;
3246 }
3247 if (!pmc_unprivileged_syspmcs) {
3248 error = priv_check(curthread,
3249 PRIV_PMC_SYSTEM);
3250 if (error)
3251 break;
3252 }
3253 }
3254
3255 /*
3256 * Look for valid values for 'pm_flags'
3257 */
3258
3259 if ((pa.pm_flags & ~(PMC_F_DESCENDANTS | PMC_F_LOG_PROCCSW |
3260 PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN)) != 0) {
3261 error = EINVAL;
3262 break;
3263 }
3264
3265 /* process logging options are not allowed for system PMCs */
3266 if (PMC_IS_SYSTEM_MODE(mode) && (pa.pm_flags &
3267 (PMC_F_LOG_PROCCSW | PMC_F_LOG_PROCEXIT))) {
3268 error = EINVAL;
3269 break;
3270 }
3271
3272 /*
3273 * All sampling mode PMCs need to be able to interrupt the
3274 * CPU.
3275 */
3276 if (PMC_IS_SAMPLING_MODE(mode))
3277 caps |= PMC_CAP_INTERRUPT;
3278
3279 /* A valid class specifier should have been passed in. */
3280 for (n = 0; n < md->pmd_nclass; n++)
3281 if (md->pmd_classdep[n].pcd_class == pa.pm_class)
3282 break;
3283 if (n == md->pmd_nclass) {
3284 error = EINVAL;
3285 break;
3286 }
3287
3288 /* The requested PMC capabilities should be feasible. */
3289 if ((md->pmd_classdep[n].pcd_caps & caps) != caps) {
3290 error = EOPNOTSUPP;
3291 break;
3292 }
3293
3294 PMCDBG(PMC,ALL,2, "event=%d caps=0x%x mode=%d cpu=%d",
3295 pa.pm_ev, caps, mode, cpu);
3296
3297 pmc = pmc_allocate_pmc_descriptor();
3298 pmc->pm_id = PMC_ID_MAKE_ID(cpu,pa.pm_mode,pa.pm_class,
3299 PMC_ID_INVALID);
3300 pmc->pm_event = pa.pm_ev;
3301 pmc->pm_state = PMC_STATE_FREE;
3302 pmc->pm_caps = caps;
3303 pmc->pm_flags = pa.pm_flags;
3304
3305 /* switch thread to CPU 'cpu' */
3306 pmc_save_cpu_binding(&pb);
3307
3308#define PMC_IS_SHAREABLE_PMC(cpu, n) \
3309 (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_state & \
3310 PMC_PHW_FLAG_IS_SHAREABLE)
3311#define PMC_IS_UNALLOCATED(cpu, n) \
3312 (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_pmc == NULL)
3313
3314 if (PMC_IS_SYSTEM_MODE(mode)) {
3315 pmc_select_cpu(cpu);
3316 for (n = 0; n < (int) md->pmd_npmc; n++) {
3317 pcd = pmc_ri_to_classdep(md, n, &adjri);
3318 if (pmc_can_allocate_row(n, mode) == 0 &&
3319 pmc_can_allocate_rowindex(
3320 curthread->td_proc, n, cpu) == 0 &&
3321 (PMC_IS_UNALLOCATED(cpu, n) ||
3322 PMC_IS_SHAREABLE_PMC(cpu, n)) &&
3323 pcd->pcd_allocate_pmc(cpu, adjri, pmc,
3324 &pa) == 0)
3325 break;
3326 }
3327 } else {
3328 /* Process virtual mode */
3329 for (n = 0; n < (int) md->pmd_npmc; n++) {
3330 pcd = pmc_ri_to_classdep(md, n, &adjri);
3331 if (pmc_can_allocate_row(n, mode) == 0 &&
3332 pmc_can_allocate_rowindex(
3333 curthread->td_proc, n,
3334 PMC_CPU_ANY) == 0 &&
3335 pcd->pcd_allocate_pmc(curthread->td_oncpu,
3336 adjri, pmc, &pa) == 0)
3337 break;
3338 }
3339 }
3340
3341#undef PMC_IS_UNALLOCATED
3342#undef PMC_IS_SHAREABLE_PMC
3343
3344 pmc_restore_cpu_binding(&pb);
3345
3346 if (n == (int) md->pmd_npmc) {
3347 pmc_destroy_pmc_descriptor(pmc);
3348 free(pmc, M_PMC);
3349 pmc = NULL;
3350 error = EINVAL;
3351 break;
3352 }
3353
3354 /* Fill in the correct value in the ID field */
3355 pmc->pm_id = PMC_ID_MAKE_ID(cpu,mode,pa.pm_class,n);
3356
3357 PMCDBG(PMC,ALL,2, "ev=%d class=%d mode=%d n=%d -> pmcid=%x",
3358 pmc->pm_event, pa.pm_class, mode, n, pmc->pm_id);
3359
3360 /* Process mode PMCs with logging enabled need log files */
3361 if (pmc->pm_flags & (PMC_F_LOG_PROCEXIT | PMC_F_LOG_PROCCSW))
3362 pmc->pm_flags |= PMC_F_NEEDS_LOGFILE;
3363
3364 /* All system mode sampling PMCs require a log file */
3365 if (PMC_IS_SAMPLING_MODE(mode) && PMC_IS_SYSTEM_MODE(mode))
3366 pmc->pm_flags |= PMC_F_NEEDS_LOGFILE;
3367
3368 /*
3369 * Configure global pmc's immediately
3370 */
3371
3372 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pmc))) {
3373
3374 pmc_save_cpu_binding(&pb);
3375 pmc_select_cpu(cpu);
3376
3377 phw = pmc_pcpu[cpu]->pc_hwpmcs[n];
3378 pcd = pmc_ri_to_classdep(md, n, &adjri);
3379
3380 if ((phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0 ||
3381 (error = pcd->pcd_config_pmc(cpu, adjri, pmc)) != 0) {
3382 (void) pcd->pcd_release_pmc(cpu, adjri, pmc);
3383 pmc_destroy_pmc_descriptor(pmc);
3384 free(pmc, M_PMC);
3385 pmc = NULL;
3386 pmc_restore_cpu_binding(&pb);
3387 error = EPERM;
3388 break;
3389 }
3390
3391 pmc_restore_cpu_binding(&pb);
3392 }
3393
3394 pmc->pm_state = PMC_STATE_ALLOCATED;
3395
3396 /*
3397 * mark row disposition
3398 */
3399
3400 if (PMC_IS_SYSTEM_MODE(mode))
3401 PMC_MARK_ROW_STANDALONE(n);
3402 else
3403 PMC_MARK_ROW_THREAD(n);
3404
3405 /*
3406 * Register this PMC with the current thread as its owner.
3407 */
3408
3409 if ((error =
3410 pmc_register_owner(curthread->td_proc, pmc)) != 0) {
3411 pmc_release_pmc_descriptor(pmc);
3412 free(pmc, M_PMC);
3413 pmc = NULL;
3414 break;
3415 }
3416
3417 /*
3418 * Return the allocated index.
3419 */
3420
3421 pa.pm_pmcid = pmc->pm_id;
3422
3423 error = copyout(&pa, arg, sizeof(pa));
3424 }
3425 break;
3426
3427
3428 /*
3429 * Attach a PMC to a process.
3430 */
3431
3432 case PMC_OP_PMCATTACH:
3433 {
3434 struct pmc *pm;
3435 struct proc *p;
3436 struct pmc_op_pmcattach a;
3437
3438 sx_assert(&pmc_sx, SX_XLOCKED);
3439
3440 if ((error = copyin(arg, &a, sizeof(a))) != 0)
3441 break;
3442
3443 if (a.pm_pid < 0) {
3444 error = EINVAL;
3445 break;
3446 } else if (a.pm_pid == 0)
3447 a.pm_pid = td->td_proc->p_pid;
3448
3449 if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0)
3450 break;
3451
3452 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
3453 error = EINVAL;
3454 break;
3455 }
3456
3457 /* PMCs may be (re)attached only when allocated or stopped */
3458 if (pm->pm_state == PMC_STATE_RUNNING) {
3459 error = EBUSY;
3460 break;
3461 } else if (pm->pm_state != PMC_STATE_ALLOCATED &&
3462 pm->pm_state != PMC_STATE_STOPPED) {
3463 error = EINVAL;
3464 break;
3465 }
3466
3467 /* lookup pid */
3468 if ((p = pfind(a.pm_pid)) == NULL) {
3469 error = ESRCH;
3470 break;
3471 }
3472
3473 /*
3474 * Ignore processes that are working on exiting.
3475 */
3476 if (p->p_flag & P_WEXIT) {
3477 error = ESRCH;
3478 PROC_UNLOCK(p); /* pfind() returns a locked process */
3479 break;
3480 }
3481
3482 /*
3483 * we are allowed to attach a PMC to a process if
3484 * we can debug it.
3485 */
3486 error = p_candebug(curthread, p);
3487
3488 PROC_UNLOCK(p);
3489
3490 if (error == 0)
3491 error = pmc_attach_process(p, pm);
3492 }
3493 break;
3494
3495
3496 /*
3497 * Detach an attached PMC from a process.
3498 */
3499
3500 case PMC_OP_PMCDETACH:
3501 {
3502 struct pmc *pm;
3503 struct proc *p;
3504 struct pmc_op_pmcattach a;
3505
3506 if ((error = copyin(arg, &a, sizeof(a))) != 0)
3507 break;
3508
3509 if (a.pm_pid < 0) {
3510 error = EINVAL;
3511 break;
3512 } else if (a.pm_pid == 0)
3513 a.pm_pid = td->td_proc->p_pid;
3514
3515 if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0)
3516 break;
3517
3518 if ((p = pfind(a.pm_pid)) == NULL) {
3519 error = ESRCH;
3520 break;
3521 }
3522
3523 /*
3524 * Treat processes that are in the process of exiting
3525 * as if they were not present.
3526 */
3527
3528 if (p->p_flag & P_WEXIT)
3529 error = ESRCH;
3530
3531 PROC_UNLOCK(p); /* pfind() returns a locked process */
3532
3533 if (error == 0)
3534 error = pmc_detach_process(p, pm);
3535 }
3536 break;
3537
3538
3539 /*
3540 * Retrieve the MSR number associated with the counter
3541 * 'pmc_id'. This allows processes to directly use RDPMC
3542 * instructions to read their PMCs, without the overhead of a
3543 * system call.
3544 */
3545
3546 case PMC_OP_PMCGETMSR:
3547 {
3548 int adjri, ri;
3549 struct pmc *pm;
3550 struct pmc_target *pt;
3551 struct pmc_op_getmsr gm;
3552 struct pmc_classdep *pcd;
3553
3554 PMC_DOWNGRADE_SX();
3555
3556 if ((error = copyin(arg, &gm, sizeof(gm))) != 0)
3557 break;
3558
3559 if ((error = pmc_find_pmc(gm.pm_pmcid, &pm)) != 0)
3560 break;
3561
3562 /*
3563 * The allocated PMC has to be a process virtual PMC,
3564 * i.e., of type MODE_T[CS]. Global PMCs can only be
3565 * read using the PMCREAD operation since they may be
3566 * allocated on a different CPU than the one we could
3567 * be running on at the time of the RDPMC instruction.
3568 *
3569 * The GETMSR operation is not allowed for PMCs that
3570 * are inherited across processes.
3571 */
3572
3573 if (!PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) ||
3574 (pm->pm_flags & PMC_F_DESCENDANTS)) {
3575 error = EINVAL;
3576 break;
3577 }
3578
3579 /*
3580 * It only makes sense to use a RDPMC (or its
3581 * equivalent instruction on non-x86 architectures) on
3582 * a process that has allocated and attached a PMC to
3583 * itself. Conversely the PMC is only allowed to have
3584 * one process attached to it -- its owner.
3585 */
3586
3587 if ((pt = LIST_FIRST(&pm->pm_targets)) == NULL ||
3588 LIST_NEXT(pt, pt_next) != NULL ||
3589 pt->pt_process->pp_proc != pm->pm_owner->po_owner) {
3590 error = EINVAL;
3591 break;
3592 }
3593
3594 ri = PMC_TO_ROWINDEX(pm);
3595 pcd = pmc_ri_to_classdep(md, ri, &adjri);
3596
3597 /* PMC class has no 'GETMSR' support */
3598 if (pcd->pcd_get_msr == NULL) {
3599 error = ENOSYS;
3600 break;
3601 }
3602
3603 if ((error = (*pcd->pcd_get_msr)(adjri, &gm.pm_msr)) < 0)
3604 break;
3605
3606 if ((error = copyout(&gm, arg, sizeof(gm))) < 0)
3607 break;
3608
3609 /*
3610 * Mark our process as using MSRs. Update machine
3611 * state using a forced context switch.
3612 */
3613
3614 pt->pt_process->pp_flags |= PMC_PP_ENABLE_MSR_ACCESS;
3615 pmc_force_context_switch();
3616
3617 }
3618 break;
3619
3620 /*
3621 * Release an allocated PMC
3622 */
3623
3624 case PMC_OP_PMCRELEASE:
3625 {
3626 pmc_id_t pmcid;
3627 struct pmc *pm;
3628 struct pmc_owner *po;
3629 struct pmc_op_simple sp;
3630
3631 /*
3632 * Find PMC pointer for the named PMC.
3633 *
3634 * Use pmc_release_pmc_descriptor() to switch off the
3635 * PMC, remove all its target threads, and remove the
3636 * PMC from its owner's list.
3637 *
3638 * Remove the owner record if this is the last PMC
3639 * owned.
3640 *
3641 * Free up space.
3642 */
3643
3644 if ((error = copyin(arg, &sp, sizeof(sp))) != 0)
3645 break;
3646
3647 pmcid = sp.pm_pmcid;
3648
3649 if ((error = pmc_find_pmc(pmcid, &pm)) != 0)
3650 break;
3651
3652 po = pm->pm_owner;
3653 pmc_release_pmc_descriptor(pm);
3654 pmc_maybe_remove_owner(po);
3655
3656 free(pm, M_PMC);
3657 }
3658 break;
3659
3660
3661 /*
3662 * Read and/or write a PMC.
3663 */
3664
3665 case PMC_OP_PMCRW:
3666 {
3667 int adjri;
3668 struct pmc *pm;
3669 uint32_t cpu, ri;
3670 pmc_value_t oldvalue;
3671 struct pmc_binding pb;
3672 struct pmc_op_pmcrw prw;
3673 struct pmc_classdep *pcd;
3674 struct pmc_op_pmcrw *pprw;
3675
3676 PMC_DOWNGRADE_SX();
3677
3678 if ((error = copyin(arg, &prw, sizeof(prw))) != 0)
3679 break;
3680
3681 ri = 0;
3682 PMCDBG(PMC,OPS,1, "rw id=%d flags=0x%x", prw.pm_pmcid,
3683 prw.pm_flags);
3684
3685 /* must have at least one flag set */
3686 if ((prw.pm_flags & (PMC_F_OLDVALUE|PMC_F_NEWVALUE)) == 0) {
3687 error = EINVAL;
3688 break;
3689 }
3690
3691 /* locate pmc descriptor */
3692 if ((error = pmc_find_pmc(prw.pm_pmcid, &pm)) != 0)
3693 break;
3694
3695 /* Can't read a PMC that hasn't been started. */
3696 if (pm->pm_state != PMC_STATE_ALLOCATED &&
3697 pm->pm_state != PMC_STATE_STOPPED &&
3698 pm->pm_state != PMC_STATE_RUNNING) {
3699 error = EINVAL;
3700 break;
3701 }
3702
3703 /* writing a new value is allowed only for 'STOPPED' pmcs */
3704 if (pm->pm_state == PMC_STATE_RUNNING &&
3705 (prw.pm_flags & PMC_F_NEWVALUE)) {
3706 error = EBUSY;
3707 break;
3708 }
3709
3710 if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) {
3711
3712 /*
3713 * If this PMC is attached to its owner (i.e.,
3714 * the process requesting this operation) and
3715 * is running, then attempt to get an
3716 * upto-date reading from hardware for a READ.
3717 * Writes are only allowed when the PMC is
3718 * stopped, so only update the saved value
3719 * field.
3720 *
3721 * If the PMC is not running, or is not
3722 * attached to its owner, read/write to the
3723 * savedvalue field.
3724 */
3725
3726 ri = PMC_TO_ROWINDEX(pm);
3727 pcd = pmc_ri_to_classdep(md, ri, &adjri);
3728
3729 mtx_pool_lock_spin(pmc_mtxpool, pm);
3730 cpu = curthread->td_oncpu;
3731
3732 if (prw.pm_flags & PMC_F_OLDVALUE) {
3733 if ((pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) &&
3734 (pm->pm_state == PMC_STATE_RUNNING))
3735 error = (*pcd->pcd_read_pmc)(cpu, adjri,
3736 &oldvalue);
3737 else
3738 oldvalue = pm->pm_gv.pm_savedvalue;
3739 }
3740 if (prw.pm_flags & PMC_F_NEWVALUE)
3741 pm->pm_gv.pm_savedvalue = prw.pm_value;
3742
3743 mtx_pool_unlock_spin(pmc_mtxpool, pm);
3744
3745 } else { /* System mode PMCs */
3746 cpu = PMC_TO_CPU(pm);
3747 ri = PMC_TO_ROWINDEX(pm);
3748 pcd = pmc_ri_to_classdep(md, ri, &adjri);
3749
3750 if (!pmc_cpu_is_active(cpu)) {
3751 error = ENXIO;
3752 break;
3753 }
3754
3755 /* move this thread to CPU 'cpu' */
3756 pmc_save_cpu_binding(&pb);
3757 pmc_select_cpu(cpu);
3758
3759 critical_enter();
3760 /* save old value */
3761 if (prw.pm_flags & PMC_F_OLDVALUE)
3762 if ((error = (*pcd->pcd_read_pmc)(cpu, adjri,
3763 &oldvalue)))
3764 goto error;
3765 /* write out new value */
3766 if (prw.pm_flags & PMC_F_NEWVALUE)
3767 error = (*pcd->pcd_write_pmc)(cpu, adjri,
3768 prw.pm_value);
3769 error:
3770 critical_exit();
3771 pmc_restore_cpu_binding(&pb);
3772 if (error)
3773 break;
3774 }
3775
3776 pprw = (struct pmc_op_pmcrw *) arg;
3777
3778#ifdef DEBUG
3779 if (prw.pm_flags & PMC_F_NEWVALUE)
3780 PMCDBG(PMC,OPS,2, "rw id=%d new %jx -> old %jx",
3781 ri, prw.pm_value, oldvalue);
3782 else if (prw.pm_flags & PMC_F_OLDVALUE)
3783 PMCDBG(PMC,OPS,2, "rw id=%d -> old %jx", ri, oldvalue);
3784#endif
3785
3786 /* return old value if requested */
3787 if (prw.pm_flags & PMC_F_OLDVALUE)
3788 if ((error = copyout(&oldvalue, &pprw->pm_value,
3789 sizeof(prw.pm_value))))
3790 break;
3791
3792 }
3793 break;
3794
3795
3796 /*
3797 * Set the sampling rate for a sampling mode PMC and the
3798 * initial count for a counting mode PMC.
3799 */
3800
3801 case PMC_OP_PMCSETCOUNT:
3802 {
3803 struct pmc *pm;
3804 struct pmc_op_pmcsetcount sc;
3805
3806 PMC_DOWNGRADE_SX();
3807
3808 if ((error = copyin(arg, &sc, sizeof(sc))) != 0)
3809 break;
3810
3811 if ((error = pmc_find_pmc(sc.pm_pmcid, &pm)) != 0)
3812 break;
3813
3814 if (pm->pm_state == PMC_STATE_RUNNING) {
3815 error = EBUSY;
3816 break;
3817 }
3818
3819 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
3820 pm->pm_sc.pm_reloadcount = sc.pm_count;
3821 else
3822 pm->pm_sc.pm_initial = sc.pm_count;
3823 }
3824 break;
3825
3826
3827 /*
3828 * Start a PMC.
3829 */
3830
3831 case PMC_OP_PMCSTART:
3832 {
3833 pmc_id_t pmcid;
3834 struct pmc *pm;
3835 struct pmc_op_simple sp;
3836
3837 sx_assert(&pmc_sx, SX_XLOCKED);
3838
3839 if ((error = copyin(arg, &sp, sizeof(sp))) != 0)
3840 break;
3841
3842 pmcid = sp.pm_pmcid;
3843
3844 if ((error = pmc_find_pmc(pmcid, &pm)) != 0)
3845 break;
3846
3847 KASSERT(pmcid == pm->pm_id,
3848 ("[pmc,%d] pmcid %x != id %x", __LINE__,
3849 pm->pm_id, pmcid));
3850
3851 if (pm->pm_state == PMC_STATE_RUNNING) /* already running */
3852 break;
3853 else if (pm->pm_state != PMC_STATE_STOPPED &&
3854 pm->pm_state != PMC_STATE_ALLOCATED) {
3855 error = EINVAL;
3856 break;
3857 }
3858
3859 error = pmc_start(pm);
3860 }
3861 break;
3862
3863
3864 /*
3865 * Stop a PMC.
3866 */
3867
3868 case PMC_OP_PMCSTOP:
3869 {
3870 pmc_id_t pmcid;
3871 struct pmc *pm;
3872 struct pmc_op_simple sp;
3873
3874 PMC_DOWNGRADE_SX();
3875
3876 if ((error = copyin(arg, &sp, sizeof(sp))) != 0)
3877 break;
3878
3879 pmcid = sp.pm_pmcid;
3880
3881 /*
3882 * Mark the PMC as inactive and invoke the MD stop
3883 * routines if needed.
3884 */
3885
3886 if ((error = pmc_find_pmc(pmcid, &pm)) != 0)
3887 break;
3888
3889 KASSERT(pmcid == pm->pm_id,
3890 ("[pmc,%d] pmc id %x != pmcid %x", __LINE__,
3891 pm->pm_id, pmcid));
3892
3893 if (pm->pm_state == PMC_STATE_STOPPED) /* already stopped */
3894 break;
3895 else if (pm->pm_state != PMC_STATE_RUNNING) {
3896 error = EINVAL;
3897 break;
3898 }
3899
3900 error = pmc_stop(pm);
3901 }
3902 break;
3903
3904
3905 /*
3906 * Write a user supplied value to the log file.
3907 */
3908
3909 case PMC_OP_WRITELOG:
3910 {
3911 struct pmc_op_writelog wl;
3912 struct pmc_owner *po;
3913
3914 PMC_DOWNGRADE_SX();
3915
3916 if ((error = copyin(arg, &wl, sizeof(wl))) != 0)
3917 break;
3918
3919 if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) {
3920 error = EINVAL;
3921 break;
3922 }
3923
3924 if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) {
3925 error = EINVAL;
3926 break;
3927 }
3928
3929 error = pmclog_process_userlog(po, &wl);
3930 }
3931 break;
3932
3933
3934 default:
3935 error = EINVAL;
3936 break;
3937 }
3938
3939 if (is_sx_locked != 0) {
3940 if (is_sx_downgraded)
3941 sx_sunlock(&pmc_sx);
3942 else
3943 sx_xunlock(&pmc_sx);
3944 }
3945
3946 if (error)
3947 atomic_add_int(&pmc_stats.pm_syscall_errors, 1);
3948
3949 PICKUP_GIANT();
3950
3951 return error;
3952}
3953
3954/*
3955 * Helper functions
3956 */
3957
3958
3959/*
3960 * Mark the thread as needing callchain capture and post an AST. The
3961 * actual callchain capture will be done in a context where it is safe
3962 * to take page faults.
3963 */
3964
3965static void
3966pmc_post_callchain_callback(void)
3967{
3968 struct thread *td;
3969
3970 td = curthread;
3971
3972 /*
3973 * If there is multiple PMCs for the same interrupt ignore new post
3974 */
3975 if (td->td_pflags & TDP_CALLCHAIN)
3976 return;
3977
3978 /*
3979 * Mark this thread as needing callchain capture.
3980 * `td->td_pflags' will be safe to touch because this thread
3981 * was in user space when it was interrupted.
3982 */
3983 td->td_pflags |= TDP_CALLCHAIN;
3984
3985 /*
3986 * Don't let this thread migrate between CPUs until callchain
3987 * capture completes.
3988 */
3989 sched_pin();
3990
3991 return;
3992}
3993
3994/*
3995 * Interrupt processing.
3996 *
3997 * Find a free slot in the per-cpu array of samples and capture the
3998 * current callchain there. If a sample was successfully added, a bit
3999 * is set in mask 'pmc_cpumask' denoting that the DO_SAMPLES hook
4000 * needs to be invoked from the clock handler.
4001 *
4002 * This function is meant to be called from an NMI handler. It cannot
4003 * use any of the locking primitives supplied by the OS.
4004 */
4005
4006int
4007pmc_process_interrupt(int cpu, struct pmc *pm, struct trapframe *tf,
4008 int inuserspace)
4009{
4010 int error, callchaindepth;
4011 struct thread *td;
4012 struct pmc_sample *ps;
4013 struct pmc_samplebuffer *psb;
4014
4015 error = 0;
4016
4017 /*
4018 * Allocate space for a sample buffer.
4019 */
4020 psb = pmc_pcpu[cpu]->pc_sb;
4021
4022 ps = psb->ps_write;
4023 if (ps->ps_nsamples) { /* in use, reader hasn't caught up */
4024 pm->pm_stalled = 1;
4025 atomic_add_int(&pmc_stats.pm_intr_bufferfull, 1);
4026 PMCDBG(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d",
4027 cpu, pm, (void *) tf, inuserspace,
4028 (int) (psb->ps_write - psb->ps_samples),
4029 (int) (psb->ps_read - psb->ps_samples));
4030 error = ENOMEM;
4031 goto done;
4032 }
4033
4034
4035 /* Fill in entry. */
4036 PMCDBG(SAM,INT,1,"cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm,
4037 (void *) tf, inuserspace,
4038 (int) (psb->ps_write - psb->ps_samples),
4039 (int) (psb->ps_read - psb->ps_samples));
4040
4041 KASSERT(pm->pm_runcount >= 0,
4042 ("[pmc,%d] pm=%p runcount %d", __LINE__, (void *) pm,
4043 pm->pm_runcount));
4044
4045 atomic_add_rel_int(&pm->pm_runcount, 1); /* hold onto PMC */
4046 ps->ps_pmc = pm;
4047 if ((td = curthread) && td->td_proc)
4048 ps->ps_pid = td->td_proc->p_pid;
4049 else
4050 ps->ps_pid = -1;
4051 ps->ps_cpu = cpu;
4052 ps->ps_td = td;
4053 ps->ps_flags = inuserspace ? PMC_CC_F_USERSPACE : 0;
4054
4055 callchaindepth = (pm->pm_flags & PMC_F_CALLCHAIN) ?
4056 pmc_callchaindepth : 1;
4057
4058 if (callchaindepth == 1)
4059 ps->ps_pc[0] = PMC_TRAPFRAME_TO_PC(tf);
4060 else {
4061 /*
4062 * Kernel stack traversals can be done immediately,
4063 * while we defer to an AST for user space traversals.
4064 */
4065 if (!inuserspace)
4066 callchaindepth =
4067 pmc_save_kernel_callchain(ps->ps_pc,
4068 callchaindepth, tf);
4069 else {
4070 pmc_post_callchain_callback();
4071 callchaindepth = PMC_SAMPLE_INUSE;
4072 }
4073 }
4074
4075 ps->ps_nsamples = callchaindepth; /* mark entry as in use */
4076
4077 /* increment write pointer, modulo ring buffer size */
4078 ps++;
4079 if (ps == psb->ps_fence)
4080 psb->ps_write = psb->ps_samples;
4081 else
4082 psb->ps_write = ps;
4083
4084 done:
4085 /* mark CPU as needing processing */
4086 CPU_SET_ATOMIC(cpu, &pmc_cpumask);
4087
4088 return (error);
4089}
4090
4091/*
4092 * Capture a user call chain. This function will be called from ast()
4093 * before control returns to userland and before the process gets
4094 * rescheduled.
4095 */
4096
4097static void
4098pmc_capture_user_callchain(int cpu, struct trapframe *tf)
4099{
4100 int i;
4101 struct pmc *pm;
4102 struct thread *td;
4103 struct pmc_sample *ps;
4104 struct pmc_samplebuffer *psb;
4105#ifdef INVARIANTS
4106 int ncallchains;
4107#endif
4108
4109 sched_unpin(); /* Can migrate safely now. */
4110
4111 psb = pmc_pcpu[cpu]->pc_sb;
4112 td = curthread;
4113
4114 KASSERT(td->td_pflags & TDP_CALLCHAIN,
4115 ("[pmc,%d] Retrieving callchain for thread that doesn't want it",
4116 __LINE__));
4117
4118#ifdef INVARIANTS
4119 ncallchains = 0;
4120#endif
4121
4122 /*
4123 * Iterate through all deferred callchain requests.
4124 */
4125
4126 ps = psb->ps_samples;
4127 for (i = 0; i < pmc_nsamples; i++, ps++) {
4128
4129 if (ps->ps_nsamples != PMC_SAMPLE_INUSE)
4130 continue;
4131 if (ps->ps_td != td)
4132 continue;
4133
4134 KASSERT(ps->ps_cpu == cpu,
4135 ("[pmc,%d] cpu mismatch ps_cpu=%d pcpu=%d", __LINE__,
4136 ps->ps_cpu, PCPU_GET(cpuid)));
4137
4138 pm = ps->ps_pmc;
4139
4140 KASSERT(pm->pm_flags & PMC_F_CALLCHAIN,
4141 ("[pmc,%d] Retrieving callchain for PMC that doesn't "
4142 "want it", __LINE__));
4143
4144 KASSERT(pm->pm_runcount > 0,
4145 ("[pmc,%d] runcount %d", __LINE__, pm->pm_runcount));
4146
4147 /*
4148 * Retrieve the callchain and mark the sample buffer
4149 * as 'processable' by the timer tick sweep code.
4150 */
4151 ps->ps_nsamples = pmc_save_user_callchain(ps->ps_pc,
4152 pmc_callchaindepth, tf);
4153
4154#ifdef INVARIANTS
4155 ncallchains++;
4156#endif
4157
4158 }
4159
4160 KASSERT(ncallchains > 0,
4161 ("[pmc,%d] cpu %d didn't find a sample to collect", __LINE__,
4162 cpu));
4163
4164 return;
4165}
4166
4167
4168/*
4169 * Process saved PC samples.
4170 */
4171
4172static void
4173pmc_process_samples(int cpu)
4174{
4175 struct pmc *pm;
4176 int adjri, n;
4177 struct thread *td;
4178 struct pmc_owner *po;
4179 struct pmc_sample *ps;
4180 struct pmc_classdep *pcd;
4181 struct pmc_samplebuffer *psb;
4182
4183 KASSERT(PCPU_GET(cpuid) == cpu,
4184 ("[pmc,%d] not on the correct CPU pcpu=%d cpu=%d", __LINE__,
4185 PCPU_GET(cpuid), cpu));
4186
4187 psb = pmc_pcpu[cpu]->pc_sb;
4188
4189 for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */
4190
4191 ps = psb->ps_read;
4192 if (ps->ps_nsamples == PMC_SAMPLE_FREE)
4193 break;
4194 if (ps->ps_nsamples == PMC_SAMPLE_INUSE) {
4195 /* Need a rescan at a later time. */
4196 CPU_SET_ATOMIC(cpu, &pmc_cpumask);
4197 break;
4198 }
4199
4200 pm = ps->ps_pmc;
4201
4202 KASSERT(pm->pm_runcount > 0,
4203 ("[pmc,%d] pm=%p runcount %d", __LINE__, (void *) pm,
4204 pm->pm_runcount));
4205
4206 po = pm->pm_owner;
4207
4208 KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)),
4209 ("[pmc,%d] pmc=%p non-sampling mode=%d", __LINE__,
4210 pm, PMC_TO_MODE(pm)));
4211
4212 /* Ignore PMCs that have been switched off */
4213 if (pm->pm_state != PMC_STATE_RUNNING)
4214 goto entrydone;
4215
4216 PMCDBG(SAM,OPS,1,"cpu=%d pm=%p n=%d fl=%x wr=%d rd=%d", cpu,
4217 pm, ps->ps_nsamples, ps->ps_flags,
4218 (int) (psb->ps_write - psb->ps_samples),
4219 (int) (psb->ps_read - psb->ps_samples));
4220
4221 /*
4222 * If this is a process-mode PMC that is attached to
4223 * its owner, and if the PC is in user mode, update
4224 * profiling statistics like timer-based profiling
4225 * would have done.
4226 */
4227 if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) {
4228 if (ps->ps_flags & PMC_CC_F_USERSPACE) {
4229 td = FIRST_THREAD_IN_PROC(po->po_owner);
4230 addupc_intr(td, ps->ps_pc[0], 1);
4231 }
4232 goto entrydone;
4233 }
4234
4235 /*
4236 * Otherwise, this is either a sampling mode PMC that
4237 * is attached to a different process than its owner,
4238 * or a system-wide sampling PMC. Dispatch a log
4239 * entry to the PMC's owner process.
4240 */
4241
4242 pmclog_process_callchain(pm, ps);
4243
4244 entrydone:
4245 ps->ps_nsamples = 0; /* mark entry as free */
4246 atomic_subtract_rel_int(&pm->pm_runcount, 1);
4247
4248 /* increment read pointer, modulo sample size */
4249 if (++ps == psb->ps_fence)
4250 psb->ps_read = psb->ps_samples;
4251 else
4252 psb->ps_read = ps;
4253 }
4254
4255 atomic_add_int(&pmc_stats.pm_log_sweeps, 1);
4256
4257 /* Do not re-enable stalled PMCs if we failed to process any samples */
4258 if (n == 0)
4259 return;
4260
4261 /*
4262 * Restart any stalled sampling PMCs on this CPU.
4263 *
4264 * If the NMI handler sets the pm_stalled field of a PMC after
4265 * the check below, we'll end up processing the stalled PMC at
4266 * the next hardclock tick.
4267 */
4268 for (n = 0; n < md->pmd_npmc; n++) {
4269 pcd = pmc_ri_to_classdep(md, n, &adjri);
4270 KASSERT(pcd != NULL,
4271 ("[pmc,%d] null pcd ri=%d", __LINE__, n));
4272 (void) (*pcd->pcd_get_config)(cpu,adjri,&pm);
4273
4274 if (pm == NULL || /* !cfg'ed */
4275 pm->pm_state != PMC_STATE_RUNNING || /* !active */
4276 !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) || /* !sampling */
4277 pm->pm_stalled == 0) /* !stalled */
4278 continue;
4279
4280 pm->pm_stalled = 0;
4281 (*pcd->pcd_start_pmc)(cpu, adjri);
4282 }
4283}
4284
4285/*
4286 * Event handlers.
4287 */
4288
4289/*
4290 * Handle a process exit.
4291 *
4292 * Remove this process from all hash tables. If this process
4293 * owned any PMCs, turn off those PMCs and deallocate them,
4294 * removing any associations with target processes.
4295 *
4296 * This function will be called by the last 'thread' of a
4297 * process.
4298 *
4299 * XXX This eventhandler gets called early in the exit process.
4300 * Consider using a 'hook' invocation from thread_exit() or equivalent
4301 * spot. Another negative is that kse_exit doesn't seem to call
4302 * exit1() [??].
4303 *
4304 */
4305
4306static void
4307pmc_process_exit(void *arg __unused, struct proc *p)
4308{
4309 struct pmc *pm;
4310 int adjri, cpu;
4311 unsigned int ri;
4312 int is_using_hwpmcs;
4313 struct pmc_owner *po;
4314 struct pmc_process *pp;
4315 struct pmc_classdep *pcd;
4316 pmc_value_t newvalue, tmp;
4317
4318 PROC_LOCK(p);
4319 is_using_hwpmcs = p->p_flag & P_HWPMC;
4320 PROC_UNLOCK(p);
4321
4322 /*
4323 * Log a sysexit event to all SS PMC owners.
4324 */
4325 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
4326 if (po->po_flags & PMC_PO_OWNS_LOGFILE)
4327 pmclog_process_sysexit(po, p->p_pid);
4328
4329 if (!is_using_hwpmcs)
4330 return;
4331
4332 PMC_GET_SX_XLOCK();
4333 PMCDBG(PRC,EXT,1,"process-exit proc=%p (%d, %s)", p, p->p_pid,
4334 p->p_comm);
4335
4336 /*
4337 * Since this code is invoked by the last thread in an exiting
4338 * process, we would have context switched IN at some prior
4339 * point. However, with PREEMPTION, kernel mode context
4340 * switches may happen any time, so we want to disable a
4341 * context switch OUT till we get any PMCs targetting this
4342 * process off the hardware.
4343 *
4344 * We also need to atomically remove this process'
4345 * entry from our target process hash table, using
4346 * PMC_FLAG_REMOVE.
4347 */
4348 PMCDBG(PRC,EXT,1, "process-exit proc=%p (%d, %s)", p, p->p_pid,
4349 p->p_comm);
4350
4351 critical_enter(); /* no preemption */
4352
4353 cpu = curthread->td_oncpu;
4354
4355 if ((pp = pmc_find_process_descriptor(p,
4356 PMC_FLAG_REMOVE)) != NULL) {
4357
4358 PMCDBG(PRC,EXT,2,
4359 "process-exit proc=%p pmc-process=%p", p, pp);
4360
4361 /*
4362 * The exiting process could the target of
4363 * some PMCs which will be running on
4364 * currently executing CPU.
4365 *
4366 * We need to turn these PMCs off like we
4367 * would do at context switch OUT time.
4368 */
4369 for (ri = 0; ri < md->pmd_npmc; ri++) {
4370
4371 /*
4372 * Pick up the pmc pointer from hardware
4373 * state similar to the CSW_OUT code.
4374 */
4375 pm = NULL;
4376
4377 pcd = pmc_ri_to_classdep(md, ri, &adjri);
4378
4379 (void) (*pcd->pcd_get_config)(cpu, adjri, &pm);
4380
4381 PMCDBG(PRC,EXT,2, "ri=%d pm=%p", ri, pm);
4382
4383 if (pm == NULL ||
4384 !PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)))
4385 continue;
4386
4387 PMCDBG(PRC,EXT,2, "ppmcs[%d]=%p pm=%p "
4388 "state=%d", ri, pp->pp_pmcs[ri].pp_pmc,
4389 pm, pm->pm_state);
4390
4391 KASSERT(PMC_TO_ROWINDEX(pm) == ri,
4392 ("[pmc,%d] ri mismatch pmc(%d) ri(%d)",
4393 __LINE__, PMC_TO_ROWINDEX(pm), ri));
4394
4395 KASSERT(pm == pp->pp_pmcs[ri].pp_pmc,
4396 ("[pmc,%d] pm %p != pp_pmcs[%d] %p",
4397 __LINE__, pm, ri, pp->pp_pmcs[ri].pp_pmc));
4398
4399 (void) pcd->pcd_stop_pmc(cpu, adjri);
4400
4401 KASSERT(pm->pm_runcount > 0,
4402 ("[pmc,%d] bad runcount ri %d rc %d",
4403 __LINE__, ri, pm->pm_runcount));
4404
4405 /* Stop hardware only if it is actually running */
4406 if (pm->pm_state == PMC_STATE_RUNNING &&
4407 pm->pm_stalled == 0) {
4408 pcd->pcd_read_pmc(cpu, adjri, &newvalue);
4409 tmp = newvalue -
4410 PMC_PCPU_SAVED(cpu,ri);
4411
4412 mtx_pool_lock_spin(pmc_mtxpool, pm);
4413 pm->pm_gv.pm_savedvalue += tmp;
4414 pp->pp_pmcs[ri].pp_pmcval += tmp;
4415 mtx_pool_unlock_spin(pmc_mtxpool, pm);
4416 }
4417
4418 atomic_subtract_rel_int(&pm->pm_runcount,1);
4419
4420 KASSERT((int) pm->pm_runcount >= 0,
4421 ("[pmc,%d] runcount is %d", __LINE__, ri));
4422
4423 (void) pcd->pcd_config_pmc(cpu, adjri, NULL);
4424 }
4425
4426 /*
4427 * Inform the MD layer of this pseudo "context switch
4428 * out"
4429 */
4430 (void) md->pmd_switch_out(pmc_pcpu[cpu], pp);
4431
4432 critical_exit(); /* ok to be pre-empted now */
4433
4434 /*
4435 * Unlink this process from the PMCs that are
4436 * targetting it. This will send a signal to
4437 * all PMC owner's whose PMCs are orphaned.
4438 *
4439 * Log PMC value at exit time if requested.
4440 */
4441 for (ri = 0; ri < md->pmd_npmc; ri++)
4442 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) {
4443 if (pm->pm_flags & PMC_F_NEEDS_LOGFILE &&
4444 PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)))
4445 pmclog_process_procexit(pm, pp);
4446 pmc_unlink_target_process(pm, pp);
4447 }
4448 free(pp, M_PMC);
4449
4450 } else
4451 critical_exit(); /* pp == NULL */
4452
4453
4454 /*
4455 * If the process owned PMCs, free them up and free up
4456 * memory.
4457 */
4458 if ((po = pmc_find_owner_descriptor(p)) != NULL) {
4459 pmc_remove_owner(po);
4460 pmc_destroy_owner_descriptor(po);
4461 }
4462
4463 sx_xunlock(&pmc_sx);
4464}
4465
4466/*
4467 * Handle a process fork.
4468 *
4469 * If the parent process 'p1' is under HWPMC monitoring, then copy
4470 * over any attached PMCs that have 'do_descendants' semantics.
4471 */
4472
4473static void
4474pmc_process_fork(void *arg __unused, struct proc *p1, struct proc *newproc,
4475 int flags)
4476{
4477 int is_using_hwpmcs;
4478 unsigned int ri;
4479 uint32_t do_descendants;
4480 struct pmc *pm;
4481 struct pmc_owner *po;
4482 struct pmc_process *ppnew, *ppold;
4483
4484 (void) flags; /* unused parameter */
4485
4486 PROC_LOCK(p1);
4487 is_using_hwpmcs = p1->p_flag & P_HWPMC;
4488 PROC_UNLOCK(p1);
4489
4490 /*
4491 * If there are system-wide sampling PMCs active, we need to
4492 * log all fork events to their owner's logs.
4493 */
4494
4495 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
4496 if (po->po_flags & PMC_PO_OWNS_LOGFILE)
4497 pmclog_process_procfork(po, p1->p_pid, newproc->p_pid);
4498
4499 if (!is_using_hwpmcs)
4500 return;
4501
4502 PMC_GET_SX_XLOCK();
4503 PMCDBG(PMC,FRK,1, "process-fork proc=%p (%d, %s) -> %p", p1,
4504 p1->p_pid, p1->p_comm, newproc);
4505
4506 /*
4507 * If the parent process (curthread->td_proc) is a
4508 * target of any PMCs, look for PMCs that are to be
4509 * inherited, and link these into the new process
4510 * descriptor.
4511 */
4512 if ((ppold = pmc_find_process_descriptor(curthread->td_proc,
4513 PMC_FLAG_NONE)) == NULL)
4514 goto done; /* nothing to do */
4515
4516 do_descendants = 0;
4517 for (ri = 0; ri < md->pmd_npmc; ri++)
4518 if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL)
4519 do_descendants |= pm->pm_flags & PMC_F_DESCENDANTS;
4520 if (do_descendants == 0) /* nothing to do */
4521 goto done;
4522
4523 /* allocate a descriptor for the new process */
4524 if ((ppnew = pmc_find_process_descriptor(newproc,
4525 PMC_FLAG_ALLOCATE)) == NULL)
4526 goto done;
4527
4528 /*
4529 * Run through all PMCs that were targeting the old process
4530 * and which specified F_DESCENDANTS and attach them to the
4531 * new process.
4532 *
4533 * Log the fork event to all owners of PMCs attached to this
4534 * process, if not already logged.
4535 */
4536 for (ri = 0; ri < md->pmd_npmc; ri++)
4537 if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL &&
4538 (pm->pm_flags & PMC_F_DESCENDANTS)) {
4539 pmc_link_target_process(pm, ppnew);
4540 po = pm->pm_owner;
4541 if (po->po_sscount == 0 &&
4542 po->po_flags & PMC_PO_OWNS_LOGFILE)
4543 pmclog_process_procfork(po, p1->p_pid,
4544 newproc->p_pid);
4545 }
4546
4547 /*
4548 * Now mark the new process as being tracked by this driver.
4549 */
4550 PROC_LOCK(newproc);
4551 newproc->p_flag |= P_HWPMC;
4552 PROC_UNLOCK(newproc);
4553
4554 done:
4555 sx_xunlock(&pmc_sx);
4556}
4557
4558
4559/*
4560 * initialization
4561 */
4562
4563static const char *pmc_name_of_pmcclass[] = {
4564#undef __PMC_CLASS
4565#define __PMC_CLASS(N) #N ,
4566 __PMC_CLASSES()
4567};
4568
4569static int
4570pmc_initialize(void)
4571{
4572 int c, cpu, error, n, ri;
4573 unsigned int maxcpu;
4574 struct pmc_binding pb;
4575 struct pmc_sample *ps;
4576 struct pmc_classdep *pcd;
4577 struct pmc_samplebuffer *sb;
4578
4579 md = NULL;
4580 error = 0;
4581
4582#ifdef DEBUG
4583 /* parse debug flags first */
4584 if (TUNABLE_STR_FETCH(PMC_SYSCTL_NAME_PREFIX "debugflags",
4585 pmc_debugstr, sizeof(pmc_debugstr)))
4586 pmc_debugflags_parse(pmc_debugstr,
4587 pmc_debugstr+strlen(pmc_debugstr));
4588#endif
4589
4590 PMCDBG(MOD,INI,0, "PMC Initialize (version %x)", PMC_VERSION);
4591
4592 /* check kernel version */
4593 if (pmc_kernel_version != PMC_VERSION) {
4594 if (pmc_kernel_version == 0)
4595 printf("hwpmc: this kernel has not been compiled with "
4596 "'options HWPMC_HOOKS'.\n");
4597 else
4598 printf("hwpmc: kernel version (0x%x) does not match "
4599 "module version (0x%x).\n", pmc_kernel_version,
4600 PMC_VERSION);
4601 return EPROGMISMATCH;
4602 }
4603
4604 /*
4605 * check sysctl parameters
4606 */
4607
4608 if (pmc_hashsize <= 0) {
4609 (void) printf("hwpmc: tunable \"hashsize\"=%d must be "
4610 "greater than zero.\n", pmc_hashsize);
4611 pmc_hashsize = PMC_HASH_SIZE;
4612 }
4613
4614 if (pmc_nsamples <= 0 || pmc_nsamples > 65535) {
4615 (void) printf("hwpmc: tunable \"nsamples\"=%d out of "
4616 "range.\n", pmc_nsamples);
4617 pmc_nsamples = PMC_NSAMPLES;
4618 }
4619
4620 if (pmc_callchaindepth <= 0 ||
4621 pmc_callchaindepth > PMC_CALLCHAIN_DEPTH_MAX) {
4622 (void) printf("hwpmc: tunable \"callchaindepth\"=%d out of "
4623 "range.\n", pmc_callchaindepth);
4624 pmc_callchaindepth = PMC_CALLCHAIN_DEPTH;
4625 }
4626
4627 md = pmc_md_initialize();
4628
4629 if (md == NULL)
4630 return (ENOSYS);
4631
4632 KASSERT(md->pmd_nclass >= 1 && md->pmd_npmc >= 1,
4633 ("[pmc,%d] no classes or pmcs", __LINE__));
4634
4635 /* Compute the map from row-indices to classdep pointers. */
4636 pmc_rowindex_to_classdep = malloc(sizeof(struct pmc_classdep *) *
4637 md->pmd_npmc, M_PMC, M_WAITOK|M_ZERO);
4638
4639 for (n = 0; n < md->pmd_npmc; n++)
4640 pmc_rowindex_to_classdep[n] = NULL;
4641 for (ri = c = 0; c < md->pmd_nclass; c++) {
4642 pcd = &md->pmd_classdep[c];
4643 for (n = 0; n < pcd->pcd_num; n++, ri++)
4644 pmc_rowindex_to_classdep[ri] = pcd;
4645 }
4646
4647 KASSERT(ri == md->pmd_npmc,
4648 ("[pmc,%d] npmc miscomputed: ri=%d, md->npmc=%d", __LINE__,
4649 ri, md->pmd_npmc));
4650
4651 maxcpu = pmc_cpu_max();
4652
4653 /* allocate space for the per-cpu array */
4654 pmc_pcpu = malloc(maxcpu * sizeof(struct pmc_cpu *), M_PMC,
4655 M_WAITOK|M_ZERO);
4656
4657 /* per-cpu 'saved values' for managing process-mode PMCs */
4658 pmc_pcpu_saved = malloc(sizeof(pmc_value_t) * maxcpu * md->pmd_npmc,
4659 M_PMC, M_WAITOK);
4660
4661 /* Perform CPU-dependent initialization. */
4662 pmc_save_cpu_binding(&pb);
4663 error = 0;
4664 for (cpu = 0; error == 0 && cpu < maxcpu; cpu++) {
4665 if (!pmc_cpu_is_active(cpu))
4666 continue;
4667 pmc_select_cpu(cpu);
4668 pmc_pcpu[cpu] = malloc(sizeof(struct pmc_cpu) +
4669 md->pmd_npmc * sizeof(struct pmc_hw *), M_PMC,
4670 M_WAITOK|M_ZERO);
4671 if (md->pmd_pcpu_init)
4672 error = md->pmd_pcpu_init(md, cpu);
4673 for (n = 0; error == 0 && n < md->pmd_nclass; n++)
4674 error = md->pmd_classdep[n].pcd_pcpu_init(md, cpu);
4675 }
4676 pmc_restore_cpu_binding(&pb);
4677
4678 if (error)
4679 return (error);
4680
4681 /* allocate space for the sample array */
4682 for (cpu = 0; cpu < maxcpu; cpu++) {
4683 if (!pmc_cpu_is_active(cpu))
4684 continue;
4685
4686 sb = malloc(sizeof(struct pmc_samplebuffer) +
4687 pmc_nsamples * sizeof(struct pmc_sample), M_PMC,
4688 M_WAITOK|M_ZERO);
4689 sb->ps_read = sb->ps_write = sb->ps_samples;
4690 sb->ps_fence = sb->ps_samples + pmc_nsamples;
4691
4692 KASSERT(pmc_pcpu[cpu] != NULL,
4693 ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu));
4694
4695 sb->ps_callchains = malloc(pmc_callchaindepth * pmc_nsamples *
4696 sizeof(uintptr_t), M_PMC, M_WAITOK|M_ZERO);
4697
4698 for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++)
4699 ps->ps_pc = sb->ps_callchains +
4700 (n * pmc_callchaindepth);
4701
4702 pmc_pcpu[cpu]->pc_sb = sb;
4703 }
4704
4705 /* allocate space for the row disposition array */
4706 pmc_pmcdisp = malloc(sizeof(enum pmc_mode) * md->pmd_npmc,
4707 M_PMC, M_WAITOK|M_ZERO);
4708
4709 KASSERT(pmc_pmcdisp != NULL,
4710 ("[pmc,%d] pmcdisp allocation returned NULL", __LINE__));
4711
4712 /* mark all PMCs as available */
4713 for (n = 0; n < (int) md->pmd_npmc; n++)
4714 PMC_MARK_ROW_FREE(n);
4715
4716 /* allocate thread hash tables */
4717 pmc_ownerhash = hashinit(pmc_hashsize, M_PMC,
4718 &pmc_ownerhashmask);
4719
4720 pmc_processhash = hashinit(pmc_hashsize, M_PMC,
4721 &pmc_processhashmask);
4722 mtx_init(&pmc_processhash_mtx, "pmc-process-hash", "pmc-leaf",
4723 MTX_SPIN);
4724
4725 LIST_INIT(&pmc_ss_owners);
4726 pmc_ss_count = 0;
4727
4728 /* allocate a pool of spin mutexes */
4729 pmc_mtxpool = mtx_pool_create("pmc-leaf", pmc_mtxpool_size,
4730 MTX_SPIN);
4731
4732 PMCDBG(MOD,INI,1, "pmc_ownerhash=%p, mask=0x%lx "
4733 "targethash=%p mask=0x%lx", pmc_ownerhash, pmc_ownerhashmask,
4734 pmc_processhash, pmc_processhashmask);
4735
4736 /* register process {exit,fork,exec} handlers */
4737 pmc_exit_tag = EVENTHANDLER_REGISTER(process_exit,
4738 pmc_process_exit, NULL, EVENTHANDLER_PRI_ANY);
4739 pmc_fork_tag = EVENTHANDLER_REGISTER(process_fork,
4740 pmc_process_fork, NULL, EVENTHANDLER_PRI_ANY);
4741
4742 /* initialize logging */
4743 pmclog_initialize();
4744
4745 /* set hook functions */
4746 pmc_intr = md->pmd_intr;
4747 pmc_hook = pmc_hook_handler;
4748
4749 if (error == 0) {
4750 printf(PMC_MODULE_NAME ":");
4751 for (n = 0; n < (int) md->pmd_nclass; n++) {
4752 pcd = &md->pmd_classdep[n];
4753 printf(" %s/%d/%d/0x%b",
4754 pmc_name_of_pmcclass[pcd->pcd_class],
4755 pcd->pcd_num,
4756 pcd->pcd_width,
4757 pcd->pcd_caps,
4758 "\20"
4759 "\1INT\2USR\3SYS\4EDG\5THR"
4760 "\6REA\7WRI\10INV\11QUA\12PRC"
4761 "\13TAG\14CSC");
4762 }
4763 printf("\n");
4764 }
4765
4766 return (error);
4767}
4768
4769/* prepare to be unloaded */
4770static void
4771pmc_cleanup(void)
4772{
4773 int c, cpu;
4774 unsigned int maxcpu;
4775 struct pmc_ownerhash *ph;
4776 struct pmc_owner *po, *tmp;
4777 struct pmc_binding pb;
4778#ifdef DEBUG
4779 struct pmc_processhash *prh;
4780#endif
4781
4782 PMCDBG(MOD,INI,0, "%s", "cleanup");
4783
4784 /* switch off sampling */
4785 CPU_ZERO(&pmc_cpumask);
4786 pmc_intr = NULL;
4787
4788 sx_xlock(&pmc_sx);
4789 if (pmc_hook == NULL) { /* being unloaded already */
4790 sx_xunlock(&pmc_sx);
4791 return;
4792 }
4793
4794 pmc_hook = NULL; /* prevent new threads from entering module */
4795
4796 /* deregister event handlers */
4797 EVENTHANDLER_DEREGISTER(process_fork, pmc_fork_tag);
4798 EVENTHANDLER_DEREGISTER(process_exit, pmc_exit_tag);
4799
4800 /* send SIGBUS to all owner threads, free up allocations */
4801 if (pmc_ownerhash)
4802 for (ph = pmc_ownerhash;
4803 ph <= &pmc_ownerhash[pmc_ownerhashmask];
4804 ph++) {
4805 LIST_FOREACH_SAFE(po, ph, po_next, tmp) {
4806 pmc_remove_owner(po);
4807
4808 /* send SIGBUS to owner processes */
4809 PMCDBG(MOD,INI,2, "cleanup signal proc=%p "
4810 "(%d, %s)", po->po_owner,
4811 po->po_owner->p_pid,
4812 po->po_owner->p_comm);
4813
4814 PROC_LOCK(po->po_owner);
4815 kern_psignal(po->po_owner, SIGBUS);
4816 PROC_UNLOCK(po->po_owner);
4817
4818 pmc_destroy_owner_descriptor(po);
4819 }
4820 }
4821
4822 /* reclaim allocated data structures */
4823 if (pmc_mtxpool)
4824 mtx_pool_destroy(&pmc_mtxpool);
4825
4826 mtx_destroy(&pmc_processhash_mtx);
4827 if (pmc_processhash) {
4828#ifdef DEBUG
4829 struct pmc_process *pp;
4830
4831 PMCDBG(MOD,INI,3, "%s", "destroy process hash");
4832 for (prh = pmc_processhash;
4833 prh <= &pmc_processhash[pmc_processhashmask];
4834 prh++)
4835 LIST_FOREACH(pp, prh, pp_next)
4836 PMCDBG(MOD,INI,3, "pid=%d", pp->pp_proc->p_pid);
4837#endif
4838
4839 hashdestroy(pmc_processhash, M_PMC, pmc_processhashmask);
4840 pmc_processhash = NULL;
4841 }
4842
4843 if (pmc_ownerhash) {
4844 PMCDBG(MOD,INI,3, "%s", "destroy owner hash");
4845 hashdestroy(pmc_ownerhash, M_PMC, pmc_ownerhashmask);
4846 pmc_ownerhash = NULL;
4847 }
4848
4849 KASSERT(LIST_EMPTY(&pmc_ss_owners),
4850 ("[pmc,%d] Global SS owner list not empty", __LINE__));
4851 KASSERT(pmc_ss_count == 0,
4852 ("[pmc,%d] Global SS count not empty", __LINE__));
4853
4854 /* do processor and pmc-class dependent cleanup */
4855 maxcpu = pmc_cpu_max();
4856
4857 PMCDBG(MOD,INI,3, "%s", "md cleanup");
4858 if (md) {
4859 pmc_save_cpu_binding(&pb);
4860 for (cpu = 0; cpu < maxcpu; cpu++) {
4861 PMCDBG(MOD,INI,1,"pmc-cleanup cpu=%d pcs=%p",
4862 cpu, pmc_pcpu[cpu]);
4863 if (!pmc_cpu_is_active(cpu) || pmc_pcpu[cpu] == NULL)
4864 continue;
4865 pmc_select_cpu(cpu);
4866 for (c = 0; c < md->pmd_nclass; c++)
4867 md->pmd_classdep[c].pcd_pcpu_fini(md, cpu);
4868 if (md->pmd_pcpu_fini)
4869 md->pmd_pcpu_fini(md, cpu);
4870 }
4871
4872 pmc_md_finalize(md);
4873
4874 free(md, M_PMC);
4875 md = NULL;
4876 pmc_restore_cpu_binding(&pb);
4877 }
4878
4879 /* Free per-cpu descriptors. */
4880 for (cpu = 0; cpu < maxcpu; cpu++) {
4881 if (!pmc_cpu_is_active(cpu))
4882 continue;
4883 KASSERT(pmc_pcpu[cpu]->pc_sb != NULL,
4884 ("[pmc,%d] Null cpu sample buffer cpu=%d", __LINE__,
4885 cpu));
4886 free(pmc_pcpu[cpu]->pc_sb->ps_callchains, M_PMC);
4887 free(pmc_pcpu[cpu]->pc_sb, M_PMC);
4888 free(pmc_pcpu[cpu], M_PMC);
4889 }
4890
4891 free(pmc_pcpu, M_PMC);
4892 pmc_pcpu = NULL;
4893
4894 free(pmc_pcpu_saved, M_PMC);
4895 pmc_pcpu_saved = NULL;
4896
4897 if (pmc_pmcdisp) {
4898 free(pmc_pmcdisp, M_PMC);
4899 pmc_pmcdisp = NULL;
4900 }
4901
4902 if (pmc_rowindex_to_classdep) {
4903 free(pmc_rowindex_to_classdep, M_PMC);
4904 pmc_rowindex_to_classdep = NULL;
4905 }
4906
4907 pmclog_shutdown();
4908
4909 sx_xunlock(&pmc_sx); /* we are done */
4910}
4911
4912/*
4913 * The function called at load/unload.
4914 */
4915
4916static int
4917load (struct module *module __unused, int cmd, void *arg __unused)
4918{
4919 int error;
4920
4921 error = 0;
4922
4923 switch (cmd) {
4924 case MOD_LOAD :
4925 /* initialize the subsystem */
4926 error = pmc_initialize();
4927 if (error != 0)
4928 break;
4929 PMCDBG(MOD,INI,1, "syscall=%d maxcpu=%d",
4930 pmc_syscall_num, pmc_cpu_max());
4931 break;
4932
4933
4934 case MOD_UNLOAD :
4935 case MOD_SHUTDOWN:
4936 pmc_cleanup();
4937 PMCDBG(MOD,INI,1, "%s", "unloaded");
4938 break;
4939
4940 default :
4941 error = EINVAL; /* XXX should panic(9) */
4942 break;
4943 }
4944
4945 return error;
4946}
4947
4948/* memory pool */
4949MALLOC_DEFINE(M_PMC, "pmc", "Memory space for the PMC module");
2949 * Retrieve hardware configuration.
2950 */
2951
2952 case PMC_OP_GETCPUINFO: /* CPU information */
2953 {
2954 struct pmc_op_getcpuinfo gci;
2955 struct pmc_classinfo *pci;
2956 struct pmc_classdep *pcd;
2957 int cl;
2958
2959 gci.pm_cputype = md->pmd_cputype;
2960 gci.pm_ncpu = pmc_cpu_max();
2961 gci.pm_npmc = md->pmd_npmc;
2962 gci.pm_nclass = md->pmd_nclass;
2963 pci = gci.pm_classes;
2964 pcd = md->pmd_classdep;
2965 for (cl = 0; cl < md->pmd_nclass; cl++, pci++, pcd++) {
2966 pci->pm_caps = pcd->pcd_caps;
2967 pci->pm_class = pcd->pcd_class;
2968 pci->pm_width = pcd->pcd_width;
2969 pci->pm_num = pcd->pcd_num;
2970 }
2971 error = copyout(&gci, arg, sizeof(gci));
2972 }
2973 break;
2974
2975
2976 /*
2977 * Get module statistics
2978 */
2979
2980 case PMC_OP_GETDRIVERSTATS:
2981 {
2982 struct pmc_op_getdriverstats gms;
2983
2984 bcopy(&pmc_stats, &gms, sizeof(gms));
2985 error = copyout(&gms, arg, sizeof(gms));
2986 }
2987 break;
2988
2989
2990 /*
2991 * Retrieve module version number
2992 */
2993
2994 case PMC_OP_GETMODULEVERSION:
2995 {
2996 uint32_t cv, modv;
2997
2998 /* retrieve the client's idea of the ABI version */
2999 if ((error = copyin(arg, &cv, sizeof(uint32_t))) != 0)
3000 break;
3001 /* don't service clients newer than our driver */
3002 modv = PMC_VERSION;
3003 if ((cv & 0xFFFF0000) > (modv & 0xFFFF0000)) {
3004 error = EPROGMISMATCH;
3005 break;
3006 }
3007 error = copyout(&modv, arg, sizeof(int));
3008 }
3009 break;
3010
3011
3012 /*
3013 * Retrieve the state of all the PMCs on a given
3014 * CPU.
3015 */
3016
3017 case PMC_OP_GETPMCINFO:
3018 {
3019 int ari;
3020 struct pmc *pm;
3021 size_t pmcinfo_size;
3022 uint32_t cpu, n, npmc;
3023 struct pmc_owner *po;
3024 struct pmc_binding pb;
3025 struct pmc_classdep *pcd;
3026 struct pmc_info *p, *pmcinfo;
3027 struct pmc_op_getpmcinfo *gpi;
3028
3029 PMC_DOWNGRADE_SX();
3030
3031 gpi = (struct pmc_op_getpmcinfo *) arg;
3032
3033 if ((error = copyin(&gpi->pm_cpu, &cpu, sizeof(cpu))) != 0)
3034 break;
3035
3036 if (cpu >= pmc_cpu_max()) {
3037 error = EINVAL;
3038 break;
3039 }
3040
3041 if (!pmc_cpu_is_active(cpu)) {
3042 error = ENXIO;
3043 break;
3044 }
3045
3046 /* switch to CPU 'cpu' */
3047 pmc_save_cpu_binding(&pb);
3048 pmc_select_cpu(cpu);
3049
3050 npmc = md->pmd_npmc;
3051
3052 pmcinfo_size = npmc * sizeof(struct pmc_info);
3053 pmcinfo = malloc(pmcinfo_size, M_PMC, M_WAITOK);
3054
3055 p = pmcinfo;
3056
3057 for (n = 0; n < md->pmd_npmc; n++, p++) {
3058
3059 pcd = pmc_ri_to_classdep(md, n, &ari);
3060
3061 KASSERT(pcd != NULL,
3062 ("[pmc,%d] null pcd ri=%d", __LINE__, n));
3063
3064 if ((error = pcd->pcd_describe(cpu, ari, p, &pm)) != 0)
3065 break;
3066
3067 if (PMC_ROW_DISP_IS_STANDALONE(n))
3068 p->pm_rowdisp = PMC_DISP_STANDALONE;
3069 else if (PMC_ROW_DISP_IS_THREAD(n))
3070 p->pm_rowdisp = PMC_DISP_THREAD;
3071 else
3072 p->pm_rowdisp = PMC_DISP_FREE;
3073
3074 p->pm_ownerpid = -1;
3075
3076 if (pm == NULL) /* no PMC associated */
3077 continue;
3078
3079 po = pm->pm_owner;
3080
3081 KASSERT(po->po_owner != NULL,
3082 ("[pmc,%d] pmc_owner had a null proc pointer",
3083 __LINE__));
3084
3085 p->pm_ownerpid = po->po_owner->p_pid;
3086 p->pm_mode = PMC_TO_MODE(pm);
3087 p->pm_event = pm->pm_event;
3088 p->pm_flags = pm->pm_flags;
3089
3090 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
3091 p->pm_reloadcount =
3092 pm->pm_sc.pm_reloadcount;
3093 }
3094
3095 pmc_restore_cpu_binding(&pb);
3096
3097 /* now copy out the PMC info collected */
3098 if (error == 0)
3099 error = copyout(pmcinfo, &gpi->pm_pmcs, pmcinfo_size);
3100
3101 free(pmcinfo, M_PMC);
3102 }
3103 break;
3104
3105
3106 /*
3107 * Set the administrative state of a PMC. I.e. whether
3108 * the PMC is to be used or not.
3109 */
3110
3111 case PMC_OP_PMCADMIN:
3112 {
3113 int cpu, ri;
3114 enum pmc_state request;
3115 struct pmc_cpu *pc;
3116 struct pmc_hw *phw;
3117 struct pmc_op_pmcadmin pma;
3118 struct pmc_binding pb;
3119
3120 sx_assert(&pmc_sx, SX_XLOCKED);
3121
3122 KASSERT(td == curthread,
3123 ("[pmc,%d] td != curthread", __LINE__));
3124
3125 error = priv_check(td, PRIV_PMC_MANAGE);
3126 if (error)
3127 break;
3128
3129 if ((error = copyin(arg, &pma, sizeof(pma))) != 0)
3130 break;
3131
3132 cpu = pma.pm_cpu;
3133
3134 if (cpu < 0 || cpu >= (int) pmc_cpu_max()) {
3135 error = EINVAL;
3136 break;
3137 }
3138
3139 if (!pmc_cpu_is_active(cpu)) {
3140 error = ENXIO;
3141 break;
3142 }
3143
3144 request = pma.pm_state;
3145
3146 if (request != PMC_STATE_DISABLED &&
3147 request != PMC_STATE_FREE) {
3148 error = EINVAL;
3149 break;
3150 }
3151
3152 ri = pma.pm_pmc; /* pmc id == row index */
3153 if (ri < 0 || ri >= (int) md->pmd_npmc) {
3154 error = EINVAL;
3155 break;
3156 }
3157
3158 /*
3159 * We can't disable a PMC with a row-index allocated
3160 * for process virtual PMCs.
3161 */
3162
3163 if (PMC_ROW_DISP_IS_THREAD(ri) &&
3164 request == PMC_STATE_DISABLED) {
3165 error = EBUSY;
3166 break;
3167 }
3168
3169 /*
3170 * otherwise, this PMC on this CPU is either free or
3171 * in system-wide mode.
3172 */
3173
3174 pmc_save_cpu_binding(&pb);
3175 pmc_select_cpu(cpu);
3176
3177 pc = pmc_pcpu[cpu];
3178 phw = pc->pc_hwpmcs[ri];
3179
3180 /*
3181 * XXX do we need some kind of 'forced' disable?
3182 */
3183
3184 if (phw->phw_pmc == NULL) {
3185 if (request == PMC_STATE_DISABLED &&
3186 (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED)) {
3187 phw->phw_state &= ~PMC_PHW_FLAG_IS_ENABLED;
3188 PMC_MARK_ROW_STANDALONE(ri);
3189 } else if (request == PMC_STATE_FREE &&
3190 (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0) {
3191 phw->phw_state |= PMC_PHW_FLAG_IS_ENABLED;
3192 PMC_UNMARK_ROW_STANDALONE(ri);
3193 }
3194 /* other cases are a no-op */
3195 } else
3196 error = EBUSY;
3197
3198 pmc_restore_cpu_binding(&pb);
3199 }
3200 break;
3201
3202
3203 /*
3204 * Allocate a PMC.
3205 */
3206
3207 case PMC_OP_PMCALLOCATE:
3208 {
3209 int adjri, n;
3210 u_int cpu;
3211 uint32_t caps;
3212 struct pmc *pmc;
3213 enum pmc_mode mode;
3214 struct pmc_hw *phw;
3215 struct pmc_binding pb;
3216 struct pmc_classdep *pcd;
3217 struct pmc_op_pmcallocate pa;
3218
3219 if ((error = copyin(arg, &pa, sizeof(pa))) != 0)
3220 break;
3221
3222 caps = pa.pm_caps;
3223 mode = pa.pm_mode;
3224 cpu = pa.pm_cpu;
3225
3226 if ((mode != PMC_MODE_SS && mode != PMC_MODE_SC &&
3227 mode != PMC_MODE_TS && mode != PMC_MODE_TC) ||
3228 (cpu != (u_int) PMC_CPU_ANY && cpu >= pmc_cpu_max())) {
3229 error = EINVAL;
3230 break;
3231 }
3232
3233 /*
3234 * Virtual PMCs should only ask for a default CPU.
3235 * System mode PMCs need to specify a non-default CPU.
3236 */
3237
3238 if ((PMC_IS_VIRTUAL_MODE(mode) && cpu != (u_int) PMC_CPU_ANY) ||
3239 (PMC_IS_SYSTEM_MODE(mode) && cpu == (u_int) PMC_CPU_ANY)) {
3240 error = EINVAL;
3241 break;
3242 }
3243
3244 /*
3245 * Check that an inactive CPU is not being asked for.
3246 */
3247
3248 if (PMC_IS_SYSTEM_MODE(mode) && !pmc_cpu_is_active(cpu)) {
3249 error = ENXIO;
3250 break;
3251 }
3252
3253 /*
3254 * Refuse an allocation for a system-wide PMC if this
3255 * process has been jailed, or if this process lacks
3256 * super-user credentials and the sysctl tunable
3257 * 'security.bsd.unprivileged_syspmcs' is zero.
3258 */
3259
3260 if (PMC_IS_SYSTEM_MODE(mode)) {
3261 if (jailed(curthread->td_ucred)) {
3262 error = EPERM;
3263 break;
3264 }
3265 if (!pmc_unprivileged_syspmcs) {
3266 error = priv_check(curthread,
3267 PRIV_PMC_SYSTEM);
3268 if (error)
3269 break;
3270 }
3271 }
3272
3273 /*
3274 * Look for valid values for 'pm_flags'
3275 */
3276
3277 if ((pa.pm_flags & ~(PMC_F_DESCENDANTS | PMC_F_LOG_PROCCSW |
3278 PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN)) != 0) {
3279 error = EINVAL;
3280 break;
3281 }
3282
3283 /* process logging options are not allowed for system PMCs */
3284 if (PMC_IS_SYSTEM_MODE(mode) && (pa.pm_flags &
3285 (PMC_F_LOG_PROCCSW | PMC_F_LOG_PROCEXIT))) {
3286 error = EINVAL;
3287 break;
3288 }
3289
3290 /*
3291 * All sampling mode PMCs need to be able to interrupt the
3292 * CPU.
3293 */
3294 if (PMC_IS_SAMPLING_MODE(mode))
3295 caps |= PMC_CAP_INTERRUPT;
3296
3297 /* A valid class specifier should have been passed in. */
3298 for (n = 0; n < md->pmd_nclass; n++)
3299 if (md->pmd_classdep[n].pcd_class == pa.pm_class)
3300 break;
3301 if (n == md->pmd_nclass) {
3302 error = EINVAL;
3303 break;
3304 }
3305
3306 /* The requested PMC capabilities should be feasible. */
3307 if ((md->pmd_classdep[n].pcd_caps & caps) != caps) {
3308 error = EOPNOTSUPP;
3309 break;
3310 }
3311
3312 PMCDBG(PMC,ALL,2, "event=%d caps=0x%x mode=%d cpu=%d",
3313 pa.pm_ev, caps, mode, cpu);
3314
3315 pmc = pmc_allocate_pmc_descriptor();
3316 pmc->pm_id = PMC_ID_MAKE_ID(cpu,pa.pm_mode,pa.pm_class,
3317 PMC_ID_INVALID);
3318 pmc->pm_event = pa.pm_ev;
3319 pmc->pm_state = PMC_STATE_FREE;
3320 pmc->pm_caps = caps;
3321 pmc->pm_flags = pa.pm_flags;
3322
3323 /* switch thread to CPU 'cpu' */
3324 pmc_save_cpu_binding(&pb);
3325
3326#define PMC_IS_SHAREABLE_PMC(cpu, n) \
3327 (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_state & \
3328 PMC_PHW_FLAG_IS_SHAREABLE)
3329#define PMC_IS_UNALLOCATED(cpu, n) \
3330 (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_pmc == NULL)
3331
3332 if (PMC_IS_SYSTEM_MODE(mode)) {
3333 pmc_select_cpu(cpu);
3334 for (n = 0; n < (int) md->pmd_npmc; n++) {
3335 pcd = pmc_ri_to_classdep(md, n, &adjri);
3336 if (pmc_can_allocate_row(n, mode) == 0 &&
3337 pmc_can_allocate_rowindex(
3338 curthread->td_proc, n, cpu) == 0 &&
3339 (PMC_IS_UNALLOCATED(cpu, n) ||
3340 PMC_IS_SHAREABLE_PMC(cpu, n)) &&
3341 pcd->pcd_allocate_pmc(cpu, adjri, pmc,
3342 &pa) == 0)
3343 break;
3344 }
3345 } else {
3346 /* Process virtual mode */
3347 for (n = 0; n < (int) md->pmd_npmc; n++) {
3348 pcd = pmc_ri_to_classdep(md, n, &adjri);
3349 if (pmc_can_allocate_row(n, mode) == 0 &&
3350 pmc_can_allocate_rowindex(
3351 curthread->td_proc, n,
3352 PMC_CPU_ANY) == 0 &&
3353 pcd->pcd_allocate_pmc(curthread->td_oncpu,
3354 adjri, pmc, &pa) == 0)
3355 break;
3356 }
3357 }
3358
3359#undef PMC_IS_UNALLOCATED
3360#undef PMC_IS_SHAREABLE_PMC
3361
3362 pmc_restore_cpu_binding(&pb);
3363
3364 if (n == (int) md->pmd_npmc) {
3365 pmc_destroy_pmc_descriptor(pmc);
3366 free(pmc, M_PMC);
3367 pmc = NULL;
3368 error = EINVAL;
3369 break;
3370 }
3371
3372 /* Fill in the correct value in the ID field */
3373 pmc->pm_id = PMC_ID_MAKE_ID(cpu,mode,pa.pm_class,n);
3374
3375 PMCDBG(PMC,ALL,2, "ev=%d class=%d mode=%d n=%d -> pmcid=%x",
3376 pmc->pm_event, pa.pm_class, mode, n, pmc->pm_id);
3377
3378 /* Process mode PMCs with logging enabled need log files */
3379 if (pmc->pm_flags & (PMC_F_LOG_PROCEXIT | PMC_F_LOG_PROCCSW))
3380 pmc->pm_flags |= PMC_F_NEEDS_LOGFILE;
3381
3382 /* All system mode sampling PMCs require a log file */
3383 if (PMC_IS_SAMPLING_MODE(mode) && PMC_IS_SYSTEM_MODE(mode))
3384 pmc->pm_flags |= PMC_F_NEEDS_LOGFILE;
3385
3386 /*
3387 * Configure global pmc's immediately
3388 */
3389
3390 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pmc))) {
3391
3392 pmc_save_cpu_binding(&pb);
3393 pmc_select_cpu(cpu);
3394
3395 phw = pmc_pcpu[cpu]->pc_hwpmcs[n];
3396 pcd = pmc_ri_to_classdep(md, n, &adjri);
3397
3398 if ((phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0 ||
3399 (error = pcd->pcd_config_pmc(cpu, adjri, pmc)) != 0) {
3400 (void) pcd->pcd_release_pmc(cpu, adjri, pmc);
3401 pmc_destroy_pmc_descriptor(pmc);
3402 free(pmc, M_PMC);
3403 pmc = NULL;
3404 pmc_restore_cpu_binding(&pb);
3405 error = EPERM;
3406 break;
3407 }
3408
3409 pmc_restore_cpu_binding(&pb);
3410 }
3411
3412 pmc->pm_state = PMC_STATE_ALLOCATED;
3413
3414 /*
3415 * mark row disposition
3416 */
3417
3418 if (PMC_IS_SYSTEM_MODE(mode))
3419 PMC_MARK_ROW_STANDALONE(n);
3420 else
3421 PMC_MARK_ROW_THREAD(n);
3422
3423 /*
3424 * Register this PMC with the current thread as its owner.
3425 */
3426
3427 if ((error =
3428 pmc_register_owner(curthread->td_proc, pmc)) != 0) {
3429 pmc_release_pmc_descriptor(pmc);
3430 free(pmc, M_PMC);
3431 pmc = NULL;
3432 break;
3433 }
3434
3435 /*
3436 * Return the allocated index.
3437 */
3438
3439 pa.pm_pmcid = pmc->pm_id;
3440
3441 error = copyout(&pa, arg, sizeof(pa));
3442 }
3443 break;
3444
3445
3446 /*
3447 * Attach a PMC to a process.
3448 */
3449
3450 case PMC_OP_PMCATTACH:
3451 {
3452 struct pmc *pm;
3453 struct proc *p;
3454 struct pmc_op_pmcattach a;
3455
3456 sx_assert(&pmc_sx, SX_XLOCKED);
3457
3458 if ((error = copyin(arg, &a, sizeof(a))) != 0)
3459 break;
3460
3461 if (a.pm_pid < 0) {
3462 error = EINVAL;
3463 break;
3464 } else if (a.pm_pid == 0)
3465 a.pm_pid = td->td_proc->p_pid;
3466
3467 if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0)
3468 break;
3469
3470 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
3471 error = EINVAL;
3472 break;
3473 }
3474
3475 /* PMCs may be (re)attached only when allocated or stopped */
3476 if (pm->pm_state == PMC_STATE_RUNNING) {
3477 error = EBUSY;
3478 break;
3479 } else if (pm->pm_state != PMC_STATE_ALLOCATED &&
3480 pm->pm_state != PMC_STATE_STOPPED) {
3481 error = EINVAL;
3482 break;
3483 }
3484
3485 /* lookup pid */
3486 if ((p = pfind(a.pm_pid)) == NULL) {
3487 error = ESRCH;
3488 break;
3489 }
3490
3491 /*
3492 * Ignore processes that are working on exiting.
3493 */
3494 if (p->p_flag & P_WEXIT) {
3495 error = ESRCH;
3496 PROC_UNLOCK(p); /* pfind() returns a locked process */
3497 break;
3498 }
3499
3500 /*
3501 * we are allowed to attach a PMC to a process if
3502 * we can debug it.
3503 */
3504 error = p_candebug(curthread, p);
3505
3506 PROC_UNLOCK(p);
3507
3508 if (error == 0)
3509 error = pmc_attach_process(p, pm);
3510 }
3511 break;
3512
3513
3514 /*
3515 * Detach an attached PMC from a process.
3516 */
3517
3518 case PMC_OP_PMCDETACH:
3519 {
3520 struct pmc *pm;
3521 struct proc *p;
3522 struct pmc_op_pmcattach a;
3523
3524 if ((error = copyin(arg, &a, sizeof(a))) != 0)
3525 break;
3526
3527 if (a.pm_pid < 0) {
3528 error = EINVAL;
3529 break;
3530 } else if (a.pm_pid == 0)
3531 a.pm_pid = td->td_proc->p_pid;
3532
3533 if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0)
3534 break;
3535
3536 if ((p = pfind(a.pm_pid)) == NULL) {
3537 error = ESRCH;
3538 break;
3539 }
3540
3541 /*
3542 * Treat processes that are in the process of exiting
3543 * as if they were not present.
3544 */
3545
3546 if (p->p_flag & P_WEXIT)
3547 error = ESRCH;
3548
3549 PROC_UNLOCK(p); /* pfind() returns a locked process */
3550
3551 if (error == 0)
3552 error = pmc_detach_process(p, pm);
3553 }
3554 break;
3555
3556
3557 /*
3558 * Retrieve the MSR number associated with the counter
3559 * 'pmc_id'. This allows processes to directly use RDPMC
3560 * instructions to read their PMCs, without the overhead of a
3561 * system call.
3562 */
3563
3564 case PMC_OP_PMCGETMSR:
3565 {
3566 int adjri, ri;
3567 struct pmc *pm;
3568 struct pmc_target *pt;
3569 struct pmc_op_getmsr gm;
3570 struct pmc_classdep *pcd;
3571
3572 PMC_DOWNGRADE_SX();
3573
3574 if ((error = copyin(arg, &gm, sizeof(gm))) != 0)
3575 break;
3576
3577 if ((error = pmc_find_pmc(gm.pm_pmcid, &pm)) != 0)
3578 break;
3579
3580 /*
3581 * The allocated PMC has to be a process virtual PMC,
3582 * i.e., of type MODE_T[CS]. Global PMCs can only be
3583 * read using the PMCREAD operation since they may be
3584 * allocated on a different CPU than the one we could
3585 * be running on at the time of the RDPMC instruction.
3586 *
3587 * The GETMSR operation is not allowed for PMCs that
3588 * are inherited across processes.
3589 */
3590
3591 if (!PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) ||
3592 (pm->pm_flags & PMC_F_DESCENDANTS)) {
3593 error = EINVAL;
3594 break;
3595 }
3596
3597 /*
3598 * It only makes sense to use a RDPMC (or its
3599 * equivalent instruction on non-x86 architectures) on
3600 * a process that has allocated and attached a PMC to
3601 * itself. Conversely the PMC is only allowed to have
3602 * one process attached to it -- its owner.
3603 */
3604
3605 if ((pt = LIST_FIRST(&pm->pm_targets)) == NULL ||
3606 LIST_NEXT(pt, pt_next) != NULL ||
3607 pt->pt_process->pp_proc != pm->pm_owner->po_owner) {
3608 error = EINVAL;
3609 break;
3610 }
3611
3612 ri = PMC_TO_ROWINDEX(pm);
3613 pcd = pmc_ri_to_classdep(md, ri, &adjri);
3614
3615 /* PMC class has no 'GETMSR' support */
3616 if (pcd->pcd_get_msr == NULL) {
3617 error = ENOSYS;
3618 break;
3619 }
3620
3621 if ((error = (*pcd->pcd_get_msr)(adjri, &gm.pm_msr)) < 0)
3622 break;
3623
3624 if ((error = copyout(&gm, arg, sizeof(gm))) < 0)
3625 break;
3626
3627 /*
3628 * Mark our process as using MSRs. Update machine
3629 * state using a forced context switch.
3630 */
3631
3632 pt->pt_process->pp_flags |= PMC_PP_ENABLE_MSR_ACCESS;
3633 pmc_force_context_switch();
3634
3635 }
3636 break;
3637
3638 /*
3639 * Release an allocated PMC
3640 */
3641
3642 case PMC_OP_PMCRELEASE:
3643 {
3644 pmc_id_t pmcid;
3645 struct pmc *pm;
3646 struct pmc_owner *po;
3647 struct pmc_op_simple sp;
3648
3649 /*
3650 * Find PMC pointer for the named PMC.
3651 *
3652 * Use pmc_release_pmc_descriptor() to switch off the
3653 * PMC, remove all its target threads, and remove the
3654 * PMC from its owner's list.
3655 *
3656 * Remove the owner record if this is the last PMC
3657 * owned.
3658 *
3659 * Free up space.
3660 */
3661
3662 if ((error = copyin(arg, &sp, sizeof(sp))) != 0)
3663 break;
3664
3665 pmcid = sp.pm_pmcid;
3666
3667 if ((error = pmc_find_pmc(pmcid, &pm)) != 0)
3668 break;
3669
3670 po = pm->pm_owner;
3671 pmc_release_pmc_descriptor(pm);
3672 pmc_maybe_remove_owner(po);
3673
3674 free(pm, M_PMC);
3675 }
3676 break;
3677
3678
3679 /*
3680 * Read and/or write a PMC.
3681 */
3682
3683 case PMC_OP_PMCRW:
3684 {
3685 int adjri;
3686 struct pmc *pm;
3687 uint32_t cpu, ri;
3688 pmc_value_t oldvalue;
3689 struct pmc_binding pb;
3690 struct pmc_op_pmcrw prw;
3691 struct pmc_classdep *pcd;
3692 struct pmc_op_pmcrw *pprw;
3693
3694 PMC_DOWNGRADE_SX();
3695
3696 if ((error = copyin(arg, &prw, sizeof(prw))) != 0)
3697 break;
3698
3699 ri = 0;
3700 PMCDBG(PMC,OPS,1, "rw id=%d flags=0x%x", prw.pm_pmcid,
3701 prw.pm_flags);
3702
3703 /* must have at least one flag set */
3704 if ((prw.pm_flags & (PMC_F_OLDVALUE|PMC_F_NEWVALUE)) == 0) {
3705 error = EINVAL;
3706 break;
3707 }
3708
3709 /* locate pmc descriptor */
3710 if ((error = pmc_find_pmc(prw.pm_pmcid, &pm)) != 0)
3711 break;
3712
3713 /* Can't read a PMC that hasn't been started. */
3714 if (pm->pm_state != PMC_STATE_ALLOCATED &&
3715 pm->pm_state != PMC_STATE_STOPPED &&
3716 pm->pm_state != PMC_STATE_RUNNING) {
3717 error = EINVAL;
3718 break;
3719 }
3720
3721 /* writing a new value is allowed only for 'STOPPED' pmcs */
3722 if (pm->pm_state == PMC_STATE_RUNNING &&
3723 (prw.pm_flags & PMC_F_NEWVALUE)) {
3724 error = EBUSY;
3725 break;
3726 }
3727
3728 if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) {
3729
3730 /*
3731 * If this PMC is attached to its owner (i.e.,
3732 * the process requesting this operation) and
3733 * is running, then attempt to get an
3734 * upto-date reading from hardware for a READ.
3735 * Writes are only allowed when the PMC is
3736 * stopped, so only update the saved value
3737 * field.
3738 *
3739 * If the PMC is not running, or is not
3740 * attached to its owner, read/write to the
3741 * savedvalue field.
3742 */
3743
3744 ri = PMC_TO_ROWINDEX(pm);
3745 pcd = pmc_ri_to_classdep(md, ri, &adjri);
3746
3747 mtx_pool_lock_spin(pmc_mtxpool, pm);
3748 cpu = curthread->td_oncpu;
3749
3750 if (prw.pm_flags & PMC_F_OLDVALUE) {
3751 if ((pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) &&
3752 (pm->pm_state == PMC_STATE_RUNNING))
3753 error = (*pcd->pcd_read_pmc)(cpu, adjri,
3754 &oldvalue);
3755 else
3756 oldvalue = pm->pm_gv.pm_savedvalue;
3757 }
3758 if (prw.pm_flags & PMC_F_NEWVALUE)
3759 pm->pm_gv.pm_savedvalue = prw.pm_value;
3760
3761 mtx_pool_unlock_spin(pmc_mtxpool, pm);
3762
3763 } else { /* System mode PMCs */
3764 cpu = PMC_TO_CPU(pm);
3765 ri = PMC_TO_ROWINDEX(pm);
3766 pcd = pmc_ri_to_classdep(md, ri, &adjri);
3767
3768 if (!pmc_cpu_is_active(cpu)) {
3769 error = ENXIO;
3770 break;
3771 }
3772
3773 /* move this thread to CPU 'cpu' */
3774 pmc_save_cpu_binding(&pb);
3775 pmc_select_cpu(cpu);
3776
3777 critical_enter();
3778 /* save old value */
3779 if (prw.pm_flags & PMC_F_OLDVALUE)
3780 if ((error = (*pcd->pcd_read_pmc)(cpu, adjri,
3781 &oldvalue)))
3782 goto error;
3783 /* write out new value */
3784 if (prw.pm_flags & PMC_F_NEWVALUE)
3785 error = (*pcd->pcd_write_pmc)(cpu, adjri,
3786 prw.pm_value);
3787 error:
3788 critical_exit();
3789 pmc_restore_cpu_binding(&pb);
3790 if (error)
3791 break;
3792 }
3793
3794 pprw = (struct pmc_op_pmcrw *) arg;
3795
3796#ifdef DEBUG
3797 if (prw.pm_flags & PMC_F_NEWVALUE)
3798 PMCDBG(PMC,OPS,2, "rw id=%d new %jx -> old %jx",
3799 ri, prw.pm_value, oldvalue);
3800 else if (prw.pm_flags & PMC_F_OLDVALUE)
3801 PMCDBG(PMC,OPS,2, "rw id=%d -> old %jx", ri, oldvalue);
3802#endif
3803
3804 /* return old value if requested */
3805 if (prw.pm_flags & PMC_F_OLDVALUE)
3806 if ((error = copyout(&oldvalue, &pprw->pm_value,
3807 sizeof(prw.pm_value))))
3808 break;
3809
3810 }
3811 break;
3812
3813
3814 /*
3815 * Set the sampling rate for a sampling mode PMC and the
3816 * initial count for a counting mode PMC.
3817 */
3818
3819 case PMC_OP_PMCSETCOUNT:
3820 {
3821 struct pmc *pm;
3822 struct pmc_op_pmcsetcount sc;
3823
3824 PMC_DOWNGRADE_SX();
3825
3826 if ((error = copyin(arg, &sc, sizeof(sc))) != 0)
3827 break;
3828
3829 if ((error = pmc_find_pmc(sc.pm_pmcid, &pm)) != 0)
3830 break;
3831
3832 if (pm->pm_state == PMC_STATE_RUNNING) {
3833 error = EBUSY;
3834 break;
3835 }
3836
3837 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
3838 pm->pm_sc.pm_reloadcount = sc.pm_count;
3839 else
3840 pm->pm_sc.pm_initial = sc.pm_count;
3841 }
3842 break;
3843
3844
3845 /*
3846 * Start a PMC.
3847 */
3848
3849 case PMC_OP_PMCSTART:
3850 {
3851 pmc_id_t pmcid;
3852 struct pmc *pm;
3853 struct pmc_op_simple sp;
3854
3855 sx_assert(&pmc_sx, SX_XLOCKED);
3856
3857 if ((error = copyin(arg, &sp, sizeof(sp))) != 0)
3858 break;
3859
3860 pmcid = sp.pm_pmcid;
3861
3862 if ((error = pmc_find_pmc(pmcid, &pm)) != 0)
3863 break;
3864
3865 KASSERT(pmcid == pm->pm_id,
3866 ("[pmc,%d] pmcid %x != id %x", __LINE__,
3867 pm->pm_id, pmcid));
3868
3869 if (pm->pm_state == PMC_STATE_RUNNING) /* already running */
3870 break;
3871 else if (pm->pm_state != PMC_STATE_STOPPED &&
3872 pm->pm_state != PMC_STATE_ALLOCATED) {
3873 error = EINVAL;
3874 break;
3875 }
3876
3877 error = pmc_start(pm);
3878 }
3879 break;
3880
3881
3882 /*
3883 * Stop a PMC.
3884 */
3885
3886 case PMC_OP_PMCSTOP:
3887 {
3888 pmc_id_t pmcid;
3889 struct pmc *pm;
3890 struct pmc_op_simple sp;
3891
3892 PMC_DOWNGRADE_SX();
3893
3894 if ((error = copyin(arg, &sp, sizeof(sp))) != 0)
3895 break;
3896
3897 pmcid = sp.pm_pmcid;
3898
3899 /*
3900 * Mark the PMC as inactive and invoke the MD stop
3901 * routines if needed.
3902 */
3903
3904 if ((error = pmc_find_pmc(pmcid, &pm)) != 0)
3905 break;
3906
3907 KASSERT(pmcid == pm->pm_id,
3908 ("[pmc,%d] pmc id %x != pmcid %x", __LINE__,
3909 pm->pm_id, pmcid));
3910
3911 if (pm->pm_state == PMC_STATE_STOPPED) /* already stopped */
3912 break;
3913 else if (pm->pm_state != PMC_STATE_RUNNING) {
3914 error = EINVAL;
3915 break;
3916 }
3917
3918 error = pmc_stop(pm);
3919 }
3920 break;
3921
3922
3923 /*
3924 * Write a user supplied value to the log file.
3925 */
3926
3927 case PMC_OP_WRITELOG:
3928 {
3929 struct pmc_op_writelog wl;
3930 struct pmc_owner *po;
3931
3932 PMC_DOWNGRADE_SX();
3933
3934 if ((error = copyin(arg, &wl, sizeof(wl))) != 0)
3935 break;
3936
3937 if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) {
3938 error = EINVAL;
3939 break;
3940 }
3941
3942 if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) {
3943 error = EINVAL;
3944 break;
3945 }
3946
3947 error = pmclog_process_userlog(po, &wl);
3948 }
3949 break;
3950
3951
3952 default:
3953 error = EINVAL;
3954 break;
3955 }
3956
3957 if (is_sx_locked != 0) {
3958 if (is_sx_downgraded)
3959 sx_sunlock(&pmc_sx);
3960 else
3961 sx_xunlock(&pmc_sx);
3962 }
3963
3964 if (error)
3965 atomic_add_int(&pmc_stats.pm_syscall_errors, 1);
3966
3967 PICKUP_GIANT();
3968
3969 return error;
3970}
3971
3972/*
3973 * Helper functions
3974 */
3975
3976
3977/*
3978 * Mark the thread as needing callchain capture and post an AST. The
3979 * actual callchain capture will be done in a context where it is safe
3980 * to take page faults.
3981 */
3982
3983static void
3984pmc_post_callchain_callback(void)
3985{
3986 struct thread *td;
3987
3988 td = curthread;
3989
3990 /*
3991 * If there is multiple PMCs for the same interrupt ignore new post
3992 */
3993 if (td->td_pflags & TDP_CALLCHAIN)
3994 return;
3995
3996 /*
3997 * Mark this thread as needing callchain capture.
3998 * `td->td_pflags' will be safe to touch because this thread
3999 * was in user space when it was interrupted.
4000 */
4001 td->td_pflags |= TDP_CALLCHAIN;
4002
4003 /*
4004 * Don't let this thread migrate between CPUs until callchain
4005 * capture completes.
4006 */
4007 sched_pin();
4008
4009 return;
4010}
4011
4012/*
4013 * Interrupt processing.
4014 *
4015 * Find a free slot in the per-cpu array of samples and capture the
4016 * current callchain there. If a sample was successfully added, a bit
4017 * is set in mask 'pmc_cpumask' denoting that the DO_SAMPLES hook
4018 * needs to be invoked from the clock handler.
4019 *
4020 * This function is meant to be called from an NMI handler. It cannot
4021 * use any of the locking primitives supplied by the OS.
4022 */
4023
4024int
4025pmc_process_interrupt(int cpu, struct pmc *pm, struct trapframe *tf,
4026 int inuserspace)
4027{
4028 int error, callchaindepth;
4029 struct thread *td;
4030 struct pmc_sample *ps;
4031 struct pmc_samplebuffer *psb;
4032
4033 error = 0;
4034
4035 /*
4036 * Allocate space for a sample buffer.
4037 */
4038 psb = pmc_pcpu[cpu]->pc_sb;
4039
4040 ps = psb->ps_write;
4041 if (ps->ps_nsamples) { /* in use, reader hasn't caught up */
4042 pm->pm_stalled = 1;
4043 atomic_add_int(&pmc_stats.pm_intr_bufferfull, 1);
4044 PMCDBG(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d",
4045 cpu, pm, (void *) tf, inuserspace,
4046 (int) (psb->ps_write - psb->ps_samples),
4047 (int) (psb->ps_read - psb->ps_samples));
4048 error = ENOMEM;
4049 goto done;
4050 }
4051
4052
4053 /* Fill in entry. */
4054 PMCDBG(SAM,INT,1,"cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm,
4055 (void *) tf, inuserspace,
4056 (int) (psb->ps_write - psb->ps_samples),
4057 (int) (psb->ps_read - psb->ps_samples));
4058
4059 KASSERT(pm->pm_runcount >= 0,
4060 ("[pmc,%d] pm=%p runcount %d", __LINE__, (void *) pm,
4061 pm->pm_runcount));
4062
4063 atomic_add_rel_int(&pm->pm_runcount, 1); /* hold onto PMC */
4064 ps->ps_pmc = pm;
4065 if ((td = curthread) && td->td_proc)
4066 ps->ps_pid = td->td_proc->p_pid;
4067 else
4068 ps->ps_pid = -1;
4069 ps->ps_cpu = cpu;
4070 ps->ps_td = td;
4071 ps->ps_flags = inuserspace ? PMC_CC_F_USERSPACE : 0;
4072
4073 callchaindepth = (pm->pm_flags & PMC_F_CALLCHAIN) ?
4074 pmc_callchaindepth : 1;
4075
4076 if (callchaindepth == 1)
4077 ps->ps_pc[0] = PMC_TRAPFRAME_TO_PC(tf);
4078 else {
4079 /*
4080 * Kernel stack traversals can be done immediately,
4081 * while we defer to an AST for user space traversals.
4082 */
4083 if (!inuserspace)
4084 callchaindepth =
4085 pmc_save_kernel_callchain(ps->ps_pc,
4086 callchaindepth, tf);
4087 else {
4088 pmc_post_callchain_callback();
4089 callchaindepth = PMC_SAMPLE_INUSE;
4090 }
4091 }
4092
4093 ps->ps_nsamples = callchaindepth; /* mark entry as in use */
4094
4095 /* increment write pointer, modulo ring buffer size */
4096 ps++;
4097 if (ps == psb->ps_fence)
4098 psb->ps_write = psb->ps_samples;
4099 else
4100 psb->ps_write = ps;
4101
4102 done:
4103 /* mark CPU as needing processing */
4104 CPU_SET_ATOMIC(cpu, &pmc_cpumask);
4105
4106 return (error);
4107}
4108
4109/*
4110 * Capture a user call chain. This function will be called from ast()
4111 * before control returns to userland and before the process gets
4112 * rescheduled.
4113 */
4114
4115static void
4116pmc_capture_user_callchain(int cpu, struct trapframe *tf)
4117{
4118 int i;
4119 struct pmc *pm;
4120 struct thread *td;
4121 struct pmc_sample *ps;
4122 struct pmc_samplebuffer *psb;
4123#ifdef INVARIANTS
4124 int ncallchains;
4125#endif
4126
4127 sched_unpin(); /* Can migrate safely now. */
4128
4129 psb = pmc_pcpu[cpu]->pc_sb;
4130 td = curthread;
4131
4132 KASSERT(td->td_pflags & TDP_CALLCHAIN,
4133 ("[pmc,%d] Retrieving callchain for thread that doesn't want it",
4134 __LINE__));
4135
4136#ifdef INVARIANTS
4137 ncallchains = 0;
4138#endif
4139
4140 /*
4141 * Iterate through all deferred callchain requests.
4142 */
4143
4144 ps = psb->ps_samples;
4145 for (i = 0; i < pmc_nsamples; i++, ps++) {
4146
4147 if (ps->ps_nsamples != PMC_SAMPLE_INUSE)
4148 continue;
4149 if (ps->ps_td != td)
4150 continue;
4151
4152 KASSERT(ps->ps_cpu == cpu,
4153 ("[pmc,%d] cpu mismatch ps_cpu=%d pcpu=%d", __LINE__,
4154 ps->ps_cpu, PCPU_GET(cpuid)));
4155
4156 pm = ps->ps_pmc;
4157
4158 KASSERT(pm->pm_flags & PMC_F_CALLCHAIN,
4159 ("[pmc,%d] Retrieving callchain for PMC that doesn't "
4160 "want it", __LINE__));
4161
4162 KASSERT(pm->pm_runcount > 0,
4163 ("[pmc,%d] runcount %d", __LINE__, pm->pm_runcount));
4164
4165 /*
4166 * Retrieve the callchain and mark the sample buffer
4167 * as 'processable' by the timer tick sweep code.
4168 */
4169 ps->ps_nsamples = pmc_save_user_callchain(ps->ps_pc,
4170 pmc_callchaindepth, tf);
4171
4172#ifdef INVARIANTS
4173 ncallchains++;
4174#endif
4175
4176 }
4177
4178 KASSERT(ncallchains > 0,
4179 ("[pmc,%d] cpu %d didn't find a sample to collect", __LINE__,
4180 cpu));
4181
4182 return;
4183}
4184
4185
4186/*
4187 * Process saved PC samples.
4188 */
4189
4190static void
4191pmc_process_samples(int cpu)
4192{
4193 struct pmc *pm;
4194 int adjri, n;
4195 struct thread *td;
4196 struct pmc_owner *po;
4197 struct pmc_sample *ps;
4198 struct pmc_classdep *pcd;
4199 struct pmc_samplebuffer *psb;
4200
4201 KASSERT(PCPU_GET(cpuid) == cpu,
4202 ("[pmc,%d] not on the correct CPU pcpu=%d cpu=%d", __LINE__,
4203 PCPU_GET(cpuid), cpu));
4204
4205 psb = pmc_pcpu[cpu]->pc_sb;
4206
4207 for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */
4208
4209 ps = psb->ps_read;
4210 if (ps->ps_nsamples == PMC_SAMPLE_FREE)
4211 break;
4212 if (ps->ps_nsamples == PMC_SAMPLE_INUSE) {
4213 /* Need a rescan at a later time. */
4214 CPU_SET_ATOMIC(cpu, &pmc_cpumask);
4215 break;
4216 }
4217
4218 pm = ps->ps_pmc;
4219
4220 KASSERT(pm->pm_runcount > 0,
4221 ("[pmc,%d] pm=%p runcount %d", __LINE__, (void *) pm,
4222 pm->pm_runcount));
4223
4224 po = pm->pm_owner;
4225
4226 KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)),
4227 ("[pmc,%d] pmc=%p non-sampling mode=%d", __LINE__,
4228 pm, PMC_TO_MODE(pm)));
4229
4230 /* Ignore PMCs that have been switched off */
4231 if (pm->pm_state != PMC_STATE_RUNNING)
4232 goto entrydone;
4233
4234 PMCDBG(SAM,OPS,1,"cpu=%d pm=%p n=%d fl=%x wr=%d rd=%d", cpu,
4235 pm, ps->ps_nsamples, ps->ps_flags,
4236 (int) (psb->ps_write - psb->ps_samples),
4237 (int) (psb->ps_read - psb->ps_samples));
4238
4239 /*
4240 * If this is a process-mode PMC that is attached to
4241 * its owner, and if the PC is in user mode, update
4242 * profiling statistics like timer-based profiling
4243 * would have done.
4244 */
4245 if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) {
4246 if (ps->ps_flags & PMC_CC_F_USERSPACE) {
4247 td = FIRST_THREAD_IN_PROC(po->po_owner);
4248 addupc_intr(td, ps->ps_pc[0], 1);
4249 }
4250 goto entrydone;
4251 }
4252
4253 /*
4254 * Otherwise, this is either a sampling mode PMC that
4255 * is attached to a different process than its owner,
4256 * or a system-wide sampling PMC. Dispatch a log
4257 * entry to the PMC's owner process.
4258 */
4259
4260 pmclog_process_callchain(pm, ps);
4261
4262 entrydone:
4263 ps->ps_nsamples = 0; /* mark entry as free */
4264 atomic_subtract_rel_int(&pm->pm_runcount, 1);
4265
4266 /* increment read pointer, modulo sample size */
4267 if (++ps == psb->ps_fence)
4268 psb->ps_read = psb->ps_samples;
4269 else
4270 psb->ps_read = ps;
4271 }
4272
4273 atomic_add_int(&pmc_stats.pm_log_sweeps, 1);
4274
4275 /* Do not re-enable stalled PMCs if we failed to process any samples */
4276 if (n == 0)
4277 return;
4278
4279 /*
4280 * Restart any stalled sampling PMCs on this CPU.
4281 *
4282 * If the NMI handler sets the pm_stalled field of a PMC after
4283 * the check below, we'll end up processing the stalled PMC at
4284 * the next hardclock tick.
4285 */
4286 for (n = 0; n < md->pmd_npmc; n++) {
4287 pcd = pmc_ri_to_classdep(md, n, &adjri);
4288 KASSERT(pcd != NULL,
4289 ("[pmc,%d] null pcd ri=%d", __LINE__, n));
4290 (void) (*pcd->pcd_get_config)(cpu,adjri,&pm);
4291
4292 if (pm == NULL || /* !cfg'ed */
4293 pm->pm_state != PMC_STATE_RUNNING || /* !active */
4294 !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) || /* !sampling */
4295 pm->pm_stalled == 0) /* !stalled */
4296 continue;
4297
4298 pm->pm_stalled = 0;
4299 (*pcd->pcd_start_pmc)(cpu, adjri);
4300 }
4301}
4302
4303/*
4304 * Event handlers.
4305 */
4306
4307/*
4308 * Handle a process exit.
4309 *
4310 * Remove this process from all hash tables. If this process
4311 * owned any PMCs, turn off those PMCs and deallocate them,
4312 * removing any associations with target processes.
4313 *
4314 * This function will be called by the last 'thread' of a
4315 * process.
4316 *
4317 * XXX This eventhandler gets called early in the exit process.
4318 * Consider using a 'hook' invocation from thread_exit() or equivalent
4319 * spot. Another negative is that kse_exit doesn't seem to call
4320 * exit1() [??].
4321 *
4322 */
4323
4324static void
4325pmc_process_exit(void *arg __unused, struct proc *p)
4326{
4327 struct pmc *pm;
4328 int adjri, cpu;
4329 unsigned int ri;
4330 int is_using_hwpmcs;
4331 struct pmc_owner *po;
4332 struct pmc_process *pp;
4333 struct pmc_classdep *pcd;
4334 pmc_value_t newvalue, tmp;
4335
4336 PROC_LOCK(p);
4337 is_using_hwpmcs = p->p_flag & P_HWPMC;
4338 PROC_UNLOCK(p);
4339
4340 /*
4341 * Log a sysexit event to all SS PMC owners.
4342 */
4343 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
4344 if (po->po_flags & PMC_PO_OWNS_LOGFILE)
4345 pmclog_process_sysexit(po, p->p_pid);
4346
4347 if (!is_using_hwpmcs)
4348 return;
4349
4350 PMC_GET_SX_XLOCK();
4351 PMCDBG(PRC,EXT,1,"process-exit proc=%p (%d, %s)", p, p->p_pid,
4352 p->p_comm);
4353
4354 /*
4355 * Since this code is invoked by the last thread in an exiting
4356 * process, we would have context switched IN at some prior
4357 * point. However, with PREEMPTION, kernel mode context
4358 * switches may happen any time, so we want to disable a
4359 * context switch OUT till we get any PMCs targetting this
4360 * process off the hardware.
4361 *
4362 * We also need to atomically remove this process'
4363 * entry from our target process hash table, using
4364 * PMC_FLAG_REMOVE.
4365 */
4366 PMCDBG(PRC,EXT,1, "process-exit proc=%p (%d, %s)", p, p->p_pid,
4367 p->p_comm);
4368
4369 critical_enter(); /* no preemption */
4370
4371 cpu = curthread->td_oncpu;
4372
4373 if ((pp = pmc_find_process_descriptor(p,
4374 PMC_FLAG_REMOVE)) != NULL) {
4375
4376 PMCDBG(PRC,EXT,2,
4377 "process-exit proc=%p pmc-process=%p", p, pp);
4378
4379 /*
4380 * The exiting process could the target of
4381 * some PMCs which will be running on
4382 * currently executing CPU.
4383 *
4384 * We need to turn these PMCs off like we
4385 * would do at context switch OUT time.
4386 */
4387 for (ri = 0; ri < md->pmd_npmc; ri++) {
4388
4389 /*
4390 * Pick up the pmc pointer from hardware
4391 * state similar to the CSW_OUT code.
4392 */
4393 pm = NULL;
4394
4395 pcd = pmc_ri_to_classdep(md, ri, &adjri);
4396
4397 (void) (*pcd->pcd_get_config)(cpu, adjri, &pm);
4398
4399 PMCDBG(PRC,EXT,2, "ri=%d pm=%p", ri, pm);
4400
4401 if (pm == NULL ||
4402 !PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)))
4403 continue;
4404
4405 PMCDBG(PRC,EXT,2, "ppmcs[%d]=%p pm=%p "
4406 "state=%d", ri, pp->pp_pmcs[ri].pp_pmc,
4407 pm, pm->pm_state);
4408
4409 KASSERT(PMC_TO_ROWINDEX(pm) == ri,
4410 ("[pmc,%d] ri mismatch pmc(%d) ri(%d)",
4411 __LINE__, PMC_TO_ROWINDEX(pm), ri));
4412
4413 KASSERT(pm == pp->pp_pmcs[ri].pp_pmc,
4414 ("[pmc,%d] pm %p != pp_pmcs[%d] %p",
4415 __LINE__, pm, ri, pp->pp_pmcs[ri].pp_pmc));
4416
4417 (void) pcd->pcd_stop_pmc(cpu, adjri);
4418
4419 KASSERT(pm->pm_runcount > 0,
4420 ("[pmc,%d] bad runcount ri %d rc %d",
4421 __LINE__, ri, pm->pm_runcount));
4422
4423 /* Stop hardware only if it is actually running */
4424 if (pm->pm_state == PMC_STATE_RUNNING &&
4425 pm->pm_stalled == 0) {
4426 pcd->pcd_read_pmc(cpu, adjri, &newvalue);
4427 tmp = newvalue -
4428 PMC_PCPU_SAVED(cpu,ri);
4429
4430 mtx_pool_lock_spin(pmc_mtxpool, pm);
4431 pm->pm_gv.pm_savedvalue += tmp;
4432 pp->pp_pmcs[ri].pp_pmcval += tmp;
4433 mtx_pool_unlock_spin(pmc_mtxpool, pm);
4434 }
4435
4436 atomic_subtract_rel_int(&pm->pm_runcount,1);
4437
4438 KASSERT((int) pm->pm_runcount >= 0,
4439 ("[pmc,%d] runcount is %d", __LINE__, ri));
4440
4441 (void) pcd->pcd_config_pmc(cpu, adjri, NULL);
4442 }
4443
4444 /*
4445 * Inform the MD layer of this pseudo "context switch
4446 * out"
4447 */
4448 (void) md->pmd_switch_out(pmc_pcpu[cpu], pp);
4449
4450 critical_exit(); /* ok to be pre-empted now */
4451
4452 /*
4453 * Unlink this process from the PMCs that are
4454 * targetting it. This will send a signal to
4455 * all PMC owner's whose PMCs are orphaned.
4456 *
4457 * Log PMC value at exit time if requested.
4458 */
4459 for (ri = 0; ri < md->pmd_npmc; ri++)
4460 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) {
4461 if (pm->pm_flags & PMC_F_NEEDS_LOGFILE &&
4462 PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)))
4463 pmclog_process_procexit(pm, pp);
4464 pmc_unlink_target_process(pm, pp);
4465 }
4466 free(pp, M_PMC);
4467
4468 } else
4469 critical_exit(); /* pp == NULL */
4470
4471
4472 /*
4473 * If the process owned PMCs, free them up and free up
4474 * memory.
4475 */
4476 if ((po = pmc_find_owner_descriptor(p)) != NULL) {
4477 pmc_remove_owner(po);
4478 pmc_destroy_owner_descriptor(po);
4479 }
4480
4481 sx_xunlock(&pmc_sx);
4482}
4483
4484/*
4485 * Handle a process fork.
4486 *
4487 * If the parent process 'p1' is under HWPMC monitoring, then copy
4488 * over any attached PMCs that have 'do_descendants' semantics.
4489 */
4490
4491static void
4492pmc_process_fork(void *arg __unused, struct proc *p1, struct proc *newproc,
4493 int flags)
4494{
4495 int is_using_hwpmcs;
4496 unsigned int ri;
4497 uint32_t do_descendants;
4498 struct pmc *pm;
4499 struct pmc_owner *po;
4500 struct pmc_process *ppnew, *ppold;
4501
4502 (void) flags; /* unused parameter */
4503
4504 PROC_LOCK(p1);
4505 is_using_hwpmcs = p1->p_flag & P_HWPMC;
4506 PROC_UNLOCK(p1);
4507
4508 /*
4509 * If there are system-wide sampling PMCs active, we need to
4510 * log all fork events to their owner's logs.
4511 */
4512
4513 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
4514 if (po->po_flags & PMC_PO_OWNS_LOGFILE)
4515 pmclog_process_procfork(po, p1->p_pid, newproc->p_pid);
4516
4517 if (!is_using_hwpmcs)
4518 return;
4519
4520 PMC_GET_SX_XLOCK();
4521 PMCDBG(PMC,FRK,1, "process-fork proc=%p (%d, %s) -> %p", p1,
4522 p1->p_pid, p1->p_comm, newproc);
4523
4524 /*
4525 * If the parent process (curthread->td_proc) is a
4526 * target of any PMCs, look for PMCs that are to be
4527 * inherited, and link these into the new process
4528 * descriptor.
4529 */
4530 if ((ppold = pmc_find_process_descriptor(curthread->td_proc,
4531 PMC_FLAG_NONE)) == NULL)
4532 goto done; /* nothing to do */
4533
4534 do_descendants = 0;
4535 for (ri = 0; ri < md->pmd_npmc; ri++)
4536 if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL)
4537 do_descendants |= pm->pm_flags & PMC_F_DESCENDANTS;
4538 if (do_descendants == 0) /* nothing to do */
4539 goto done;
4540
4541 /* allocate a descriptor for the new process */
4542 if ((ppnew = pmc_find_process_descriptor(newproc,
4543 PMC_FLAG_ALLOCATE)) == NULL)
4544 goto done;
4545
4546 /*
4547 * Run through all PMCs that were targeting the old process
4548 * and which specified F_DESCENDANTS and attach them to the
4549 * new process.
4550 *
4551 * Log the fork event to all owners of PMCs attached to this
4552 * process, if not already logged.
4553 */
4554 for (ri = 0; ri < md->pmd_npmc; ri++)
4555 if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL &&
4556 (pm->pm_flags & PMC_F_DESCENDANTS)) {
4557 pmc_link_target_process(pm, ppnew);
4558 po = pm->pm_owner;
4559 if (po->po_sscount == 0 &&
4560 po->po_flags & PMC_PO_OWNS_LOGFILE)
4561 pmclog_process_procfork(po, p1->p_pid,
4562 newproc->p_pid);
4563 }
4564
4565 /*
4566 * Now mark the new process as being tracked by this driver.
4567 */
4568 PROC_LOCK(newproc);
4569 newproc->p_flag |= P_HWPMC;
4570 PROC_UNLOCK(newproc);
4571
4572 done:
4573 sx_xunlock(&pmc_sx);
4574}
4575
4576
4577/*
4578 * initialization
4579 */
4580
4581static const char *pmc_name_of_pmcclass[] = {
4582#undef __PMC_CLASS
4583#define __PMC_CLASS(N) #N ,
4584 __PMC_CLASSES()
4585};
4586
4587static int
4588pmc_initialize(void)
4589{
4590 int c, cpu, error, n, ri;
4591 unsigned int maxcpu;
4592 struct pmc_binding pb;
4593 struct pmc_sample *ps;
4594 struct pmc_classdep *pcd;
4595 struct pmc_samplebuffer *sb;
4596
4597 md = NULL;
4598 error = 0;
4599
4600#ifdef DEBUG
4601 /* parse debug flags first */
4602 if (TUNABLE_STR_FETCH(PMC_SYSCTL_NAME_PREFIX "debugflags",
4603 pmc_debugstr, sizeof(pmc_debugstr)))
4604 pmc_debugflags_parse(pmc_debugstr,
4605 pmc_debugstr+strlen(pmc_debugstr));
4606#endif
4607
4608 PMCDBG(MOD,INI,0, "PMC Initialize (version %x)", PMC_VERSION);
4609
4610 /* check kernel version */
4611 if (pmc_kernel_version != PMC_VERSION) {
4612 if (pmc_kernel_version == 0)
4613 printf("hwpmc: this kernel has not been compiled with "
4614 "'options HWPMC_HOOKS'.\n");
4615 else
4616 printf("hwpmc: kernel version (0x%x) does not match "
4617 "module version (0x%x).\n", pmc_kernel_version,
4618 PMC_VERSION);
4619 return EPROGMISMATCH;
4620 }
4621
4622 /*
4623 * check sysctl parameters
4624 */
4625
4626 if (pmc_hashsize <= 0) {
4627 (void) printf("hwpmc: tunable \"hashsize\"=%d must be "
4628 "greater than zero.\n", pmc_hashsize);
4629 pmc_hashsize = PMC_HASH_SIZE;
4630 }
4631
4632 if (pmc_nsamples <= 0 || pmc_nsamples > 65535) {
4633 (void) printf("hwpmc: tunable \"nsamples\"=%d out of "
4634 "range.\n", pmc_nsamples);
4635 pmc_nsamples = PMC_NSAMPLES;
4636 }
4637
4638 if (pmc_callchaindepth <= 0 ||
4639 pmc_callchaindepth > PMC_CALLCHAIN_DEPTH_MAX) {
4640 (void) printf("hwpmc: tunable \"callchaindepth\"=%d out of "
4641 "range.\n", pmc_callchaindepth);
4642 pmc_callchaindepth = PMC_CALLCHAIN_DEPTH;
4643 }
4644
4645 md = pmc_md_initialize();
4646
4647 if (md == NULL)
4648 return (ENOSYS);
4649
4650 KASSERT(md->pmd_nclass >= 1 && md->pmd_npmc >= 1,
4651 ("[pmc,%d] no classes or pmcs", __LINE__));
4652
4653 /* Compute the map from row-indices to classdep pointers. */
4654 pmc_rowindex_to_classdep = malloc(sizeof(struct pmc_classdep *) *
4655 md->pmd_npmc, M_PMC, M_WAITOK|M_ZERO);
4656
4657 for (n = 0; n < md->pmd_npmc; n++)
4658 pmc_rowindex_to_classdep[n] = NULL;
4659 for (ri = c = 0; c < md->pmd_nclass; c++) {
4660 pcd = &md->pmd_classdep[c];
4661 for (n = 0; n < pcd->pcd_num; n++, ri++)
4662 pmc_rowindex_to_classdep[ri] = pcd;
4663 }
4664
4665 KASSERT(ri == md->pmd_npmc,
4666 ("[pmc,%d] npmc miscomputed: ri=%d, md->npmc=%d", __LINE__,
4667 ri, md->pmd_npmc));
4668
4669 maxcpu = pmc_cpu_max();
4670
4671 /* allocate space for the per-cpu array */
4672 pmc_pcpu = malloc(maxcpu * sizeof(struct pmc_cpu *), M_PMC,
4673 M_WAITOK|M_ZERO);
4674
4675 /* per-cpu 'saved values' for managing process-mode PMCs */
4676 pmc_pcpu_saved = malloc(sizeof(pmc_value_t) * maxcpu * md->pmd_npmc,
4677 M_PMC, M_WAITOK);
4678
4679 /* Perform CPU-dependent initialization. */
4680 pmc_save_cpu_binding(&pb);
4681 error = 0;
4682 for (cpu = 0; error == 0 && cpu < maxcpu; cpu++) {
4683 if (!pmc_cpu_is_active(cpu))
4684 continue;
4685 pmc_select_cpu(cpu);
4686 pmc_pcpu[cpu] = malloc(sizeof(struct pmc_cpu) +
4687 md->pmd_npmc * sizeof(struct pmc_hw *), M_PMC,
4688 M_WAITOK|M_ZERO);
4689 if (md->pmd_pcpu_init)
4690 error = md->pmd_pcpu_init(md, cpu);
4691 for (n = 0; error == 0 && n < md->pmd_nclass; n++)
4692 error = md->pmd_classdep[n].pcd_pcpu_init(md, cpu);
4693 }
4694 pmc_restore_cpu_binding(&pb);
4695
4696 if (error)
4697 return (error);
4698
4699 /* allocate space for the sample array */
4700 for (cpu = 0; cpu < maxcpu; cpu++) {
4701 if (!pmc_cpu_is_active(cpu))
4702 continue;
4703
4704 sb = malloc(sizeof(struct pmc_samplebuffer) +
4705 pmc_nsamples * sizeof(struct pmc_sample), M_PMC,
4706 M_WAITOK|M_ZERO);
4707 sb->ps_read = sb->ps_write = sb->ps_samples;
4708 sb->ps_fence = sb->ps_samples + pmc_nsamples;
4709
4710 KASSERT(pmc_pcpu[cpu] != NULL,
4711 ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu));
4712
4713 sb->ps_callchains = malloc(pmc_callchaindepth * pmc_nsamples *
4714 sizeof(uintptr_t), M_PMC, M_WAITOK|M_ZERO);
4715
4716 for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++)
4717 ps->ps_pc = sb->ps_callchains +
4718 (n * pmc_callchaindepth);
4719
4720 pmc_pcpu[cpu]->pc_sb = sb;
4721 }
4722
4723 /* allocate space for the row disposition array */
4724 pmc_pmcdisp = malloc(sizeof(enum pmc_mode) * md->pmd_npmc,
4725 M_PMC, M_WAITOK|M_ZERO);
4726
4727 KASSERT(pmc_pmcdisp != NULL,
4728 ("[pmc,%d] pmcdisp allocation returned NULL", __LINE__));
4729
4730 /* mark all PMCs as available */
4731 for (n = 0; n < (int) md->pmd_npmc; n++)
4732 PMC_MARK_ROW_FREE(n);
4733
4734 /* allocate thread hash tables */
4735 pmc_ownerhash = hashinit(pmc_hashsize, M_PMC,
4736 &pmc_ownerhashmask);
4737
4738 pmc_processhash = hashinit(pmc_hashsize, M_PMC,
4739 &pmc_processhashmask);
4740 mtx_init(&pmc_processhash_mtx, "pmc-process-hash", "pmc-leaf",
4741 MTX_SPIN);
4742
4743 LIST_INIT(&pmc_ss_owners);
4744 pmc_ss_count = 0;
4745
4746 /* allocate a pool of spin mutexes */
4747 pmc_mtxpool = mtx_pool_create("pmc-leaf", pmc_mtxpool_size,
4748 MTX_SPIN);
4749
4750 PMCDBG(MOD,INI,1, "pmc_ownerhash=%p, mask=0x%lx "
4751 "targethash=%p mask=0x%lx", pmc_ownerhash, pmc_ownerhashmask,
4752 pmc_processhash, pmc_processhashmask);
4753
4754 /* register process {exit,fork,exec} handlers */
4755 pmc_exit_tag = EVENTHANDLER_REGISTER(process_exit,
4756 pmc_process_exit, NULL, EVENTHANDLER_PRI_ANY);
4757 pmc_fork_tag = EVENTHANDLER_REGISTER(process_fork,
4758 pmc_process_fork, NULL, EVENTHANDLER_PRI_ANY);
4759
4760 /* initialize logging */
4761 pmclog_initialize();
4762
4763 /* set hook functions */
4764 pmc_intr = md->pmd_intr;
4765 pmc_hook = pmc_hook_handler;
4766
4767 if (error == 0) {
4768 printf(PMC_MODULE_NAME ":");
4769 for (n = 0; n < (int) md->pmd_nclass; n++) {
4770 pcd = &md->pmd_classdep[n];
4771 printf(" %s/%d/%d/0x%b",
4772 pmc_name_of_pmcclass[pcd->pcd_class],
4773 pcd->pcd_num,
4774 pcd->pcd_width,
4775 pcd->pcd_caps,
4776 "\20"
4777 "\1INT\2USR\3SYS\4EDG\5THR"
4778 "\6REA\7WRI\10INV\11QUA\12PRC"
4779 "\13TAG\14CSC");
4780 }
4781 printf("\n");
4782 }
4783
4784 return (error);
4785}
4786
4787/* prepare to be unloaded */
4788static void
4789pmc_cleanup(void)
4790{
4791 int c, cpu;
4792 unsigned int maxcpu;
4793 struct pmc_ownerhash *ph;
4794 struct pmc_owner *po, *tmp;
4795 struct pmc_binding pb;
4796#ifdef DEBUG
4797 struct pmc_processhash *prh;
4798#endif
4799
4800 PMCDBG(MOD,INI,0, "%s", "cleanup");
4801
4802 /* switch off sampling */
4803 CPU_ZERO(&pmc_cpumask);
4804 pmc_intr = NULL;
4805
4806 sx_xlock(&pmc_sx);
4807 if (pmc_hook == NULL) { /* being unloaded already */
4808 sx_xunlock(&pmc_sx);
4809 return;
4810 }
4811
4812 pmc_hook = NULL; /* prevent new threads from entering module */
4813
4814 /* deregister event handlers */
4815 EVENTHANDLER_DEREGISTER(process_fork, pmc_fork_tag);
4816 EVENTHANDLER_DEREGISTER(process_exit, pmc_exit_tag);
4817
4818 /* send SIGBUS to all owner threads, free up allocations */
4819 if (pmc_ownerhash)
4820 for (ph = pmc_ownerhash;
4821 ph <= &pmc_ownerhash[pmc_ownerhashmask];
4822 ph++) {
4823 LIST_FOREACH_SAFE(po, ph, po_next, tmp) {
4824 pmc_remove_owner(po);
4825
4826 /* send SIGBUS to owner processes */
4827 PMCDBG(MOD,INI,2, "cleanup signal proc=%p "
4828 "(%d, %s)", po->po_owner,
4829 po->po_owner->p_pid,
4830 po->po_owner->p_comm);
4831
4832 PROC_LOCK(po->po_owner);
4833 kern_psignal(po->po_owner, SIGBUS);
4834 PROC_UNLOCK(po->po_owner);
4835
4836 pmc_destroy_owner_descriptor(po);
4837 }
4838 }
4839
4840 /* reclaim allocated data structures */
4841 if (pmc_mtxpool)
4842 mtx_pool_destroy(&pmc_mtxpool);
4843
4844 mtx_destroy(&pmc_processhash_mtx);
4845 if (pmc_processhash) {
4846#ifdef DEBUG
4847 struct pmc_process *pp;
4848
4849 PMCDBG(MOD,INI,3, "%s", "destroy process hash");
4850 for (prh = pmc_processhash;
4851 prh <= &pmc_processhash[pmc_processhashmask];
4852 prh++)
4853 LIST_FOREACH(pp, prh, pp_next)
4854 PMCDBG(MOD,INI,3, "pid=%d", pp->pp_proc->p_pid);
4855#endif
4856
4857 hashdestroy(pmc_processhash, M_PMC, pmc_processhashmask);
4858 pmc_processhash = NULL;
4859 }
4860
4861 if (pmc_ownerhash) {
4862 PMCDBG(MOD,INI,3, "%s", "destroy owner hash");
4863 hashdestroy(pmc_ownerhash, M_PMC, pmc_ownerhashmask);
4864 pmc_ownerhash = NULL;
4865 }
4866
4867 KASSERT(LIST_EMPTY(&pmc_ss_owners),
4868 ("[pmc,%d] Global SS owner list not empty", __LINE__));
4869 KASSERT(pmc_ss_count == 0,
4870 ("[pmc,%d] Global SS count not empty", __LINE__));
4871
4872 /* do processor and pmc-class dependent cleanup */
4873 maxcpu = pmc_cpu_max();
4874
4875 PMCDBG(MOD,INI,3, "%s", "md cleanup");
4876 if (md) {
4877 pmc_save_cpu_binding(&pb);
4878 for (cpu = 0; cpu < maxcpu; cpu++) {
4879 PMCDBG(MOD,INI,1,"pmc-cleanup cpu=%d pcs=%p",
4880 cpu, pmc_pcpu[cpu]);
4881 if (!pmc_cpu_is_active(cpu) || pmc_pcpu[cpu] == NULL)
4882 continue;
4883 pmc_select_cpu(cpu);
4884 for (c = 0; c < md->pmd_nclass; c++)
4885 md->pmd_classdep[c].pcd_pcpu_fini(md, cpu);
4886 if (md->pmd_pcpu_fini)
4887 md->pmd_pcpu_fini(md, cpu);
4888 }
4889
4890 pmc_md_finalize(md);
4891
4892 free(md, M_PMC);
4893 md = NULL;
4894 pmc_restore_cpu_binding(&pb);
4895 }
4896
4897 /* Free per-cpu descriptors. */
4898 for (cpu = 0; cpu < maxcpu; cpu++) {
4899 if (!pmc_cpu_is_active(cpu))
4900 continue;
4901 KASSERT(pmc_pcpu[cpu]->pc_sb != NULL,
4902 ("[pmc,%d] Null cpu sample buffer cpu=%d", __LINE__,
4903 cpu));
4904 free(pmc_pcpu[cpu]->pc_sb->ps_callchains, M_PMC);
4905 free(pmc_pcpu[cpu]->pc_sb, M_PMC);
4906 free(pmc_pcpu[cpu], M_PMC);
4907 }
4908
4909 free(pmc_pcpu, M_PMC);
4910 pmc_pcpu = NULL;
4911
4912 free(pmc_pcpu_saved, M_PMC);
4913 pmc_pcpu_saved = NULL;
4914
4915 if (pmc_pmcdisp) {
4916 free(pmc_pmcdisp, M_PMC);
4917 pmc_pmcdisp = NULL;
4918 }
4919
4920 if (pmc_rowindex_to_classdep) {
4921 free(pmc_rowindex_to_classdep, M_PMC);
4922 pmc_rowindex_to_classdep = NULL;
4923 }
4924
4925 pmclog_shutdown();
4926
4927 sx_xunlock(&pmc_sx); /* we are done */
4928}
4929
4930/*
4931 * The function called at load/unload.
4932 */
4933
4934static int
4935load (struct module *module __unused, int cmd, void *arg __unused)
4936{
4937 int error;
4938
4939 error = 0;
4940
4941 switch (cmd) {
4942 case MOD_LOAD :
4943 /* initialize the subsystem */
4944 error = pmc_initialize();
4945 if (error != 0)
4946 break;
4947 PMCDBG(MOD,INI,1, "syscall=%d maxcpu=%d",
4948 pmc_syscall_num, pmc_cpu_max());
4949 break;
4950
4951
4952 case MOD_UNLOAD :
4953 case MOD_SHUTDOWN:
4954 pmc_cleanup();
4955 PMCDBG(MOD,INI,1, "%s", "unloaded");
4956 break;
4957
4958 default :
4959 error = EINVAL; /* XXX should panic(9) */
4960 break;
4961 }
4962
4963 return error;
4964}
4965
4966/* memory pool */
4967MALLOC_DEFINE(M_PMC, "pmc", "Memory space for the PMC module");