pmc.h revision 157217
1/*-
2 * Copyright (c) 2003-2006, Joseph Koshy
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/sys/pmc.h 157217 2006-03-28 16:20:29Z jkoshy $
27 */
28
29#ifndef _SYS_PMC_H_
30#define	_SYS_PMC_H_
31
32#include <dev/hwpmc/pmc_events.h>
33
34#include <machine/pmc_mdep.h>
35#include <machine/profile.h>
36
37#define	PMC_MODULE_NAME		"hwpmc"
38#define	PMC_NAME_MAX		16 /* HW counter name size */
39#define	PMC_CLASS_MAX		4  /* #classes of PMCs in a system */
40
41/*
42 * Kernel<->userland API version number [MMmmpppp]
43 *
44 * Major numbers are to be incremented when an incompatible change to
45 * the ABI occurs that older clients will not be able to handle.
46 *
47 * Minor numbers are incremented when a backwards compatible change
48 * occurs that allows older correct programs to run unchanged.  For
49 * example, when support for a new PMC type is added.
50 *
51 * The patch version is incremented for every bug fix.
52 */
53#define	PMC_VERSION_MAJOR	0x02
54#define	PMC_VERSION_MINOR	0x00
55#define	PMC_VERSION_PATCH	0x0000
56
57#define	PMC_VERSION		(PMC_VERSION_MAJOR << 24 |		\
58	PMC_VERSION_MINOR << 16 | PMC_VERSION_PATCH)
59
60/*
61 * Kinds of CPUs known
62 */
63
64#define	__PMC_CPUS()					\
65	__PMC_CPU(AMD_K7,     "AMD K7")			\
66	__PMC_CPU(AMD_K8,     "AMD K8")			\
67	__PMC_CPU(INTEL_P5,   "Intel Pentium")		\
68	__PMC_CPU(INTEL_P6,   "Intel Pentium Pro")	\
69	__PMC_CPU(INTEL_CL,   "Intel Celeron")		\
70	__PMC_CPU(INTEL_PII,  "Intel Pentium II")	\
71	__PMC_CPU(INTEL_PIII, "Intel Pentium III")	\
72	__PMC_CPU(INTEL_PM,   "Intel Pentium M")	\
73	__PMC_CPU(INTEL_PIV,  "Intel Pentium IV")
74
75enum pmc_cputype {
76#undef	__PMC_CPU
77#define	__PMC_CPU(S,D)	PMC_CPU_##S ,
78	__PMC_CPUS()
79};
80
81#define	PMC_CPU_FIRST	PMC_CPU_AMD_K7
82#define	PMC_CPU_LAST	PMC_CPU_INTEL_PIV
83
84/*
85 * Classes of PMCs
86 */
87
88#define	__PMC_CLASSES()							\
89	__PMC_CLASS(TSC)	/* CPU Timestamp counter */		\
90	__PMC_CLASS(K7)		/* AMD K7 performance counters */	\
91	__PMC_CLASS(K8)		/* AMD K8 performance counters */	\
92	__PMC_CLASS(P5)		/* Intel Pentium counters */		\
93	__PMC_CLASS(P6)		/* Intel Pentium Pro counters */	\
94	__PMC_CLASS(P4)		/* Intel Pentium-IV counters */
95
96enum pmc_class {
97#undef  __PMC_CLASS
98#define	__PMC_CLASS(N)	PMC_CLASS_##N ,
99	__PMC_CLASSES()
100};
101
102#define	PMC_CLASS_FIRST	PMC_CLASS_TSC
103#define	PMC_CLASS_LAST	PMC_CLASS_P4
104
105/*
106 * A PMC can be in the following states:
107 *
108 * Hardware states:
109 *   DISABLED   -- administratively prohibited from being used.
110 *   FREE       -- HW available for use
111 * Software states:
112 *   ALLOCATED  -- allocated
113 *   STOPPED    -- allocated, but not counting events
114 *   RUNNING    -- allocated, and in operation; 'pm_runcount'
115 *                 holds the number of CPUs using this PMC at
116 *                 a given instant
117 *   DELETED    -- being destroyed
118 */
119
120#define	__PMC_HWSTATES()			\
121	__PMC_STATE(DISABLED)			\
122	__PMC_STATE(FREE)
123
124#define	__PMC_SWSTATES()			\
125	__PMC_STATE(ALLOCATED)			\
126	__PMC_STATE(STOPPED)			\
127	__PMC_STATE(RUNNING)			\
128	__PMC_STATE(DELETED)
129
130#define	__PMC_STATES()				\
131	__PMC_HWSTATES()			\
132	__PMC_SWSTATES()
133
134enum pmc_state {
135#undef	__PMC_STATE
136#define	__PMC_STATE(S)	PMC_STATE_##S,
137	__PMC_STATES()
138	__PMC_STATE(MAX)
139};
140
141#define	PMC_STATE_FIRST	PMC_STATE_DISABLED
142#define	PMC_STATE_LAST	PMC_STATE_DELETED
143
144/*
145 * An allocated PMC may used as a 'global' counter or as a
146 * 'thread-private' one.  Each such mode of use can be in either
147 * statistical sampling mode or in counting mode.  Thus a PMC in use
148 *
149 * SS i.e., SYSTEM STATISTICAL  -- system-wide statistical profiling
150 * SC i.e., SYSTEM COUNTER      -- system-wide counting mode
151 * TS i.e., THREAD STATISTICAL  -- thread virtual, statistical profiling
152 * TC i.e., THREAD COUNTER      -- thread virtual, counting mode
153 *
154 * Statistical profiling modes rely on the PMC periodically delivering
155 * a interrupt to the CPU (when the configured number of events have
156 * been measured), so the PMC must have the ability to generate
157 * interrupts.
158 *
159 * In counting modes, the PMC counts its configured events, with the
160 * value of the PMC being read whenever needed by its owner process.
161 *
162 * The thread specific modes "virtualize" the PMCs -- the PMCs appear
163 * to be thread private and count events only when the profiled thread
164 * actually executes on the CPU.
165 *
166 * The system-wide "global" modes keep the PMCs running all the time
167 * and are used to measure the behaviour of the whole system.
168 */
169
170#define	__PMC_MODES()				\
171	__PMC_MODE(SS,	0)			\
172	__PMC_MODE(SC,	1)			\
173	__PMC_MODE(TS,	2)			\
174	__PMC_MODE(TC,	3)
175
176enum pmc_mode {
177#undef	__PMC_MODE
178#define	__PMC_MODE(M,N)	PMC_MODE_##M = N,
179	__PMC_MODES()
180};
181
182#define	PMC_MODE_FIRST	PMC_MODE_SS
183#define	PMC_MODE_LAST	PMC_MODE_TC
184
185#define	PMC_IS_COUNTING_MODE(mode)				\
186	((mode) == PMC_MODE_SC || (mode) == PMC_MODE_TC)
187#define	PMC_IS_SYSTEM_MODE(mode)				\
188	((mode) == PMC_MODE_SS || (mode) == PMC_MODE_SC)
189#define	PMC_IS_SAMPLING_MODE(mode)				\
190	((mode) == PMC_MODE_SS || (mode) == PMC_MODE_TS)
191#define	PMC_IS_VIRTUAL_MODE(mode)				\
192	((mode) == PMC_MODE_TS || (mode) == PMC_MODE_TC)
193
194/*
195 * PMC row disposition
196 */
197
198#define	__PMC_DISPOSITIONS(N)					\
199	__PMC_DISP(STANDALONE)	/* global/disabled counters */	\
200	__PMC_DISP(FREE)	/* free/available */		\
201	__PMC_DISP(THREAD)	/* thread-virtual PMCs */	\
202	__PMC_DISP(UNKNOWN)	/* sentinel */
203
204enum pmc_disp {
205#undef	__PMC_DISP
206#define	__PMC_DISP(D)	PMC_DISP_##D ,
207	__PMC_DISPOSITIONS()
208};
209
210#define	PMC_DISP_FIRST	PMC_DISP_STANDALONE
211#define	PMC_DISP_LAST	PMC_DISP_THREAD
212
213/*
214 * Counter capabilities
215 *
216 * __PMC_CAPS(NAME, VALUE, DESCRIPTION)
217 */
218
219#define	__PMC_CAPS()							\
220	__PMC_CAP(INTERRUPT,	0, "generate interrupts")		\
221	__PMC_CAP(USER,		1, "count user-mode events")		\
222	__PMC_CAP(SYSTEM,	2, "count system-mode events")		\
223	__PMC_CAP(EDGE,		3, "do edge detection of events")	\
224	__PMC_CAP(THRESHOLD,	4, "ignore events below a threshold")	\
225	__PMC_CAP(READ,		5, "read PMC counter")			\
226	__PMC_CAP(WRITE,	6, "reprogram PMC counter")		\
227	__PMC_CAP(INVERT,	7, "invert comparision sense")		\
228	__PMC_CAP(QUALIFIER,	8, "further qualify monitored events")	\
229	__PMC_CAP(PRECISE,	9, "perform precise sampling")		\
230	__PMC_CAP(TAGGING,	10, "tag upstream events")		\
231	__PMC_CAP(CASCADE,	11, "cascade counters")
232
233enum pmc_caps
234{
235#undef	__PMC_CAP
236#define	__PMC_CAP(NAME, VALUE, DESCR)	PMC_CAP_##NAME = (1 << VALUE) ,
237	__PMC_CAPS()
238};
239
240#define	PMC_CAP_FIRST		PMC_CAP_INTERRUPT
241#define	PMC_CAP_LAST		PMC_CAP_CASCADE
242
243/*
244 * PMC Event Numbers
245 *
246 * These are generated from the definitions in "dev/hwpmc/pmc_events.h".
247 */
248
249enum pmc_event {
250#undef	__PMC_EV
251#define	__PMC_EV(C,N,D) PMC_EV_ ## C ## _ ## N ,
252	__PMC_EVENTS()
253};
254
255#define	PMC_EVENT_FIRST	PMC_EV_TSC_TSC
256#define	PMC_EVENT_LAST	PMC_EV_P5_LAST
257
258/*
259 * PMC SYSCALL INTERFACE
260 */
261
262/*
263 * "PMC_OPS" -- these are the commands recognized by the kernel
264 * module, and are used when performing a system call from userland.
265 */
266
267#define	__PMC_OPS()							\
268	__PMC_OP(CONFIGURELOG, "Set log file")				\
269	__PMC_OP(FLUSHLOG, "Flush log file")				\
270	__PMC_OP(GETCPUINFO, "Get system CPU information")		\
271	__PMC_OP(GETDRIVERSTATS, "Get driver statistics")		\
272	__PMC_OP(GETMODULEVERSION, "Get module version")		\
273	__PMC_OP(GETPMCINFO, "Get per-cpu PMC information")		\
274	__PMC_OP(PMCADMIN, "Set PMC state")				\
275	__PMC_OP(PMCALLOCATE, "Allocate and configure a PMC")		\
276	__PMC_OP(PMCATTACH, "Attach a PMC to a process")		\
277	__PMC_OP(PMCDETACH, "Detach a PMC from a process")		\
278	__PMC_OP(PMCGETMSR, "Get a PMC's hardware address")		\
279	__PMC_OP(PMCRELEASE, "Release a PMC")				\
280	__PMC_OP(PMCRW, "Read/Set a PMC")				\
281	__PMC_OP(PMCSETCOUNT, "Set initial count/sampling rate")	\
282	__PMC_OP(PMCSTART, "Start a PMC")				\
283	__PMC_OP(PMCSTOP, "Start a PMC")				\
284	__PMC_OP(WRITELOG, "Write a cookie to the log file")
285
286enum pmc_ops {
287#undef	__PMC_OP
288#define	__PMC_OP(N, D)	PMC_OP_##N,
289	__PMC_OPS()
290};
291
292
293/*
294 * Flags used in operations on PMCs.
295 */
296
297#define	PMC_F_FORCE		0x00000001 /*OP ADMIN force operation */
298#define	PMC_F_DESCENDANTS	0x00000002 /*OP ALLOCATE track descendants */
299#define	PMC_F_LOG_PROCCSW	0x00000004 /*OP ALLOCATE track ctx switches */
300#define	PMC_F_LOG_PROCEXIT	0x00000008 /*OP ALLOCATE log proc exits */
301#define	PMC_F_NEWVALUE		0x00000010 /*OP RW write new value */
302#define	PMC_F_OLDVALUE		0x00000020 /*OP RW get old value */
303#define	PMC_F_KGMON		0x00000040 /*OP ALLOCATE kgmon(8) profiling */
304
305/* internal flags */
306#define	PMC_F_ATTACHED_TO_OWNER	0x00010000 /*attached to owner*/
307#define	PMC_F_NEEDS_LOGFILE	0x00020000 /*needs log file */
308#define	PMC_F_ATTACH_DONE	0x00040000 /*attached at least once */
309
310/*
311 * Cookies used to denote allocated PMCs, and the values of PMCs.
312 */
313
314typedef uint32_t	pmc_id_t;
315typedef uint64_t	pmc_value_t;
316
317#define	PMC_ID_INVALID		(~ (pmc_id_t) 0)
318
319/*
320 * PMC IDs have the following format:
321 *
322 * +--------+----------+-----------+-----------+
323 * |   CPU  | PMC MODE | PMC CLASS | ROW INDEX |
324 * +--------+----------+-----------+-----------+
325 *
326 * where each field is 8 bits wide.  Field 'CPU' is set to the
327 * requested CPU for system-wide PMCs or PMC_CPU_ANY for process-mode
328 * PMCs.  Field 'PMC MODE' is the allocated PMC mode.  Field 'PMC
329 * CLASS' is the class of the PMC.  Field 'ROW INDEX' is the row index
330 * for the PMC.
331 *
332 * The 'ROW INDEX' ranges over 0..NWPMCS where NHWPMCS is the total
333 * number of hardware PMCs on this cpu.
334 */
335
336
337#define	PMC_ID_TO_ROWINDEX(ID)	((ID) & 0xFF)
338#define	PMC_ID_TO_CLASS(ID)	(((ID) & 0xFF00) >> 8)
339#define	PMC_ID_TO_MODE(ID)	(((ID) & 0xFF0000) >> 16)
340#define	PMC_ID_TO_CPU(ID)	(((ID) & 0xFF000000) >> 24)
341#define	PMC_ID_MAKE_ID(CPU,MODE,CLASS,ROWINDEX)			\
342	((((CPU) & 0xFF) << 24) | (((MODE) & 0xFF) << 16) |	\
343	(((CLASS) & 0xFF) << 8) | ((ROWINDEX) & 0xFF))
344
345/*
346 * Data structures for system calls supported by the pmc driver.
347 */
348
349/*
350 * OP PMCALLOCATE
351 *
352 * Allocate a PMC on the named CPU.
353 */
354
355#define	PMC_CPU_ANY	~0
356
357struct pmc_op_pmcallocate {
358	uint32_t	pm_caps;	/* PMC_CAP_* */
359	uint32_t	pm_cpu;		/* CPU number or PMC_CPU_ANY */
360	enum pmc_class	pm_class;	/* class of PMC desired */
361	enum pmc_event	pm_ev;		/* [enum pmc_event] desired */
362	uint32_t	pm_flags;	/* additional modifiers PMC_F_* */
363	enum pmc_mode	pm_mode;	/* desired mode */
364	pmc_id_t	pm_pmcid;	/* [return] process pmc id */
365
366	union pmc_md_op_pmcallocate pm_md; /* MD layer extensions */
367};
368
369/*
370 * OP PMCADMIN
371 *
372 * Set the administrative state (i.e., whether enabled or disabled) of
373 * a PMC 'pm_pmc' on CPU 'pm_cpu'.  Note that 'pm_pmc' specifies an
374 * absolute PMC number and need not have been first allocated by the
375 * calling process.
376 */
377
378struct pmc_op_pmcadmin {
379	int		pm_cpu;		/* CPU# */
380	uint32_t	pm_flags;	/* flags */
381	int		pm_pmc;         /* PMC# */
382	enum pmc_state  pm_state;	/* desired state */
383};
384
385/*
386 * OP PMCATTACH / OP PMCDETACH
387 *
388 * Attach/detach a PMC and a process.
389 */
390
391struct pmc_op_pmcattach {
392	pmc_id_t	pm_pmc;		/* PMC to attach to */
393	pid_t		pm_pid;		/* target process */
394};
395
396/*
397 * OP PMCSETCOUNT
398 *
399 * Set the sampling rate (i.e., the reload count) for statistical counters.
400 * 'pm_pmcid' need to have been previously allocated using PMCALLOCATE.
401 */
402
403struct pmc_op_pmcsetcount {
404	pmc_value_t	pm_count;	/* initial/sample count */
405	pmc_id_t	pm_pmcid;	/* PMC id to set */
406};
407
408
409/*
410 * OP PMCRW
411 *
412 * Read the value of a PMC named by 'pm_pmcid'.  'pm_pmcid' needs
413 * to have been previously allocated using PMCALLOCATE.
414 */
415
416
417struct pmc_op_pmcrw {
418	uint32_t	pm_flags;	/* PMC_F_{OLD,NEW}VALUE*/
419	pmc_id_t	pm_pmcid;	/* pmc id */
420	pmc_value_t	pm_value;	/* new&returned value */
421};
422
423
424/*
425 * OP GETPMCINFO
426 *
427 * retrieve PMC state for a named CPU.  The caller is expected to
428 * allocate 'npmc' * 'struct pmc_info' bytes of space for the return
429 * values.
430 */
431
432struct pmc_info {
433	char		pm_name[PMC_NAME_MAX]; /* pmc name */
434	enum pmc_class	pm_class;	/* enum pmc_class */
435	int		pm_enabled;	/* whether enabled */
436	enum pmc_disp	pm_rowdisp;	/* FREE, THREAD or STANDLONE */
437	pid_t		pm_ownerpid;	/* owner, or -1 */
438	enum pmc_mode	pm_mode;	/* current mode [enum pmc_mode] */
439	enum pmc_event	pm_event;	/* current event */
440	uint32_t	pm_flags;	/* current flags */
441	pmc_value_t	pm_reloadcount;	/* sampling counters only */
442};
443
444struct pmc_op_getpmcinfo {
445	int32_t		pm_cpu;		/* 0 <= cpu < mp_maxid */
446	struct pmc_info	pm_pmcs[];	/* space for 'npmc' structures */
447};
448
449
450/*
451 * OP GETCPUINFO
452 *
453 * Retrieve system CPU information.
454 */
455
456struct pmc_classinfo {
457	enum pmc_class	pm_class; 	/* class id */
458	uint32_t	pm_caps;	/* counter capabilities */
459	uint32_t	pm_width;	/* width of the PMC */
460};
461
462struct pmc_op_getcpuinfo {
463	enum pmc_cputype pm_cputype; /* what kind of CPU */
464	uint32_t	pm_ncpu;    /* number of CPUs */
465	uint32_t	pm_npmc;    /* #PMCs per CPU */
466	uint32_t	pm_nclass;  /* #classes of PMCs */
467	struct pmc_classinfo  pm_classes[PMC_CLASS_MAX];
468};
469
470/*
471 * OP CONFIGURELOG
472 *
473 * Configure a log file for writing system-wide statistics to.
474 */
475
476struct pmc_op_configurelog {
477	int		pm_flags;
478	int		pm_logfd;   /* logfile fd (or -1) */
479};
480
481/*
482 * OP GETDRIVERSTATS
483 *
484 * Retrieve pmc(4) driver-wide statistics.
485 */
486
487struct pmc_op_getdriverstats {
488	int	pm_intr_ignored;	/* #interrupts ignored */
489	int	pm_intr_processed;	/* #interrupts processed */
490	int	pm_intr_bufferfull;	/* #interrupts with ENOSPC */
491	int	pm_syscalls;		/* #syscalls */
492	int	pm_syscall_errors;	/* #syscalls with errors */
493	int	pm_buffer_requests;	/* #buffer requests */
494	int	pm_buffer_requests_failed; /* #failed buffer requests */
495	int	pm_log_sweeps;		/* #sample buffer processing passes */
496};
497
498/*
499 * OP RELEASE / OP START / OP STOP
500 *
501 * Simple operations on a PMC id.
502 */
503
504struct pmc_op_simple {
505	pmc_id_t	pm_pmcid;
506};
507
508/*
509 * OP WRITELOG
510 *
511 * Flush the current log buffer and write 4 bytes of user data to it.
512 */
513
514struct pmc_op_writelog {
515	uint32_t	pm_userdata;
516};
517
518/*
519 * OP GETMSR
520 *
521 * Retrieve the machine specific address assoicated with the allocated
522 * PMC.  This number can be used subsequently with a read-performance-counter
523 * instruction.
524 */
525
526struct pmc_op_getmsr {
527	uint32_t	pm_msr;		/* machine specific address */
528	pmc_id_t	pm_pmcid;	/* allocated pmc id */
529};
530
531#ifdef _KERNEL
532
533#include <sys/malloc.h>
534#include <sys/sysctl.h>
535
536#define	PMC_HASH_SIZE				16
537#define	PMC_MTXPOOL_SIZE			32
538#define	PMC_LOG_BUFFER_SIZE			4
539#define	PMC_NLOGBUFFERS				16
540#define	PMC_NSAMPLES				32
541
542#define PMC_SYSCTL_NAME_PREFIX "kern." PMC_MODULE_NAME "."
543
544/*
545 * Locking keys
546 *
547 * (b) - pmc_bufferlist_mtx (spin lock)
548 * (k) - pmc_kthread_mtx (sleep lock)
549 * (o) - po->po_mtx (spin lock)
550 */
551
552/*
553 * PMC commands
554 */
555
556struct pmc_syscall_args {
557	uint32_t	pmop_code;	/* one of PMC_OP_* */
558	void		*pmop_data;	/* syscall parameter */
559};
560
561/*
562 * Interface to processor specific s1tuff
563 */
564
565/*
566 * struct pmc_descr
567 *
568 * Machine independent (i.e., the common parts) of a human readable
569 * PMC description.
570 */
571
572struct pmc_descr {
573	char		pd_name[PMC_NAME_MAX]; /* name */
574	uint32_t	pd_caps;	/* capabilities */
575	enum pmc_class	pd_class;	/* class of the PMC */
576	uint32_t	pd_width;	/* width in bits */
577};
578
579/*
580 * struct pmc_target
581 *
582 * This structure records all the target processes associated with a
583 * PMC.
584 */
585
586struct pmc_target {
587	LIST_ENTRY(pmc_target)	pt_next;
588	struct pmc_process	*pt_process; /* target descriptor */
589};
590
591/*
592 * struct pmc
593 *
594 * Describes each allocated PMC.
595 *
596 * Each PMC has precisely one owner, namely the process that allocated
597 * the PMC.
598 *
599 * A PMC may be attached to multiple target processes.  The
600 * 'pm_targets' field links all the target processes being monitored
601 * by this PMC.
602 *
603 * The 'pm_savedvalue' field is protected by a mutex.
604 *
605 * On a multi-cpu machine, multiple target threads associated with a
606 * process-virtual PMC could be concurrently executing on different
607 * CPUs.  The 'pm_runcount' field is atomically incremented every time
608 * the PMC gets scheduled on a CPU and atomically decremented when it
609 * get descheduled.  Deletion of a PMC is only permitted when this
610 * field is '0'.
611 *
612 */
613
614struct pmc {
615	LIST_HEAD(,pmc_target)	pm_targets;	/* list of target processes */
616	LIST_ENTRY(pmc)       	pm_next;	/* owner's list */
617
618	/*
619	 * System-wide PMCs are allocated on a CPU and are not moved
620	 * around.  For system-wide PMCs we record the CPU the PMC was
621	 * allocated on in the 'CPU' field of the pmc ID.
622	 *
623	 * Virtual PMCs run on whichever CPU is currently executing
624	 * their targets' threads.  For these PMCs we need to save
625	 * their current PMC counter values when they are taken off
626	 * CPU.
627	 */
628
629	union {
630		pmc_value_t	pm_savedvalue;	/* Virtual PMCS */
631	} pm_gv;
632
633	/*
634	 * For sampling mode PMCs, we keep track of the PMC's "reload
635	 * count", which is the counter value to be loaded in when
636	 * arming the PMC for the next counting session.  For counting
637	 * modes on PMCs that are read-only (e.g., the x86 TSC), we
638	 * keep track of the initial value at the start of
639	 * counting-mode operation.
640	 */
641
642	union {
643		pmc_value_t	pm_reloadcount;	/* sampling PMC modes */
644		pmc_value_t	pm_initial;	/* counting PMC modes */
645	} pm_sc;
646
647	uint32_t	pm_stalled;	/* true for stalled sampling PMCs */
648	uint32_t	pm_caps;	/* PMC capabilities */
649	enum pmc_event	pm_event;	/* event being measured */
650	uint32_t	pm_flags;	/* additional flags PMC_F_... */
651	struct pmc_owner *pm_owner;	/* owner thread state */
652	uint32_t	pm_runcount;	/* #cpus currently on */
653	enum pmc_state	pm_state;	/* current PMC state */
654
655	/*
656	 * The PMC ID field encodes the row-index for the PMC, its
657	 * mode, class and the CPU# associated with the PMC.
658	 */
659
660	pmc_id_t	pm_id; 		/* allocated PMC id */
661
662	/* md extensions */
663	union pmc_md_pmc	pm_md;
664};
665
666/*
667 * Accessor macros for 'struct pmc'
668 */
669
670#define	PMC_TO_MODE(P)		PMC_ID_TO_MODE((P)->pm_id)
671#define	PMC_TO_CLASS(P)		PMC_ID_TO_CLASS((P)->pm_id)
672#define	PMC_TO_ROWINDEX(P)	PMC_ID_TO_ROWINDEX((P)->pm_id)
673#define	PMC_TO_CPU(P)		PMC_ID_TO_CPU((P)->pm_id)
674
675/*
676 * struct pmc_process
677 *
678 * Record a 'target' process being profiled.
679 *
680 * The target process being profiled could be different from the owner
681 * process which allocated the PMCs.  Each target process descriptor
682 * is associated with NHWPMC 'struct pmc *' pointers.  Each PMC at a
683 * given hardware row-index 'n' will use slot 'n' of the 'pp_pmcs[]'
684 * array.  The size of this structure is thus PMC architecture
685 * dependent.
686 *
687 * TODO: Only process-private counting mode PMCs may be attached to a
688 * process different from the allocator process (since we do not have
689 * the infrastructure to make sense of an interrupted PC value from a
690 * 'target' process (yet)).
691 *
692 */
693
694struct pmc_targetstate {
695	struct pmc	*pp_pmc;   /* target PMC */
696	pmc_value_t	pp_pmcval; /* per-process value */
697};
698
699struct pmc_process {
700	LIST_ENTRY(pmc_process) pp_next;	/* hash chain */
701	int		pp_refcnt;		/* reference count */
702	uint32_t	pp_flags; 		/* flags PMC_PP_* */
703	struct proc	*pp_proc;		/* target thread */
704	struct pmc_targetstate pp_pmcs[];       /* NHWPMCs */
705};
706
707#define	PMC_PP_ENABLE_MSR_ACCESS	0x00000001
708
709/*
710 * struct pmc_owner
711 *
712 * We associate a PMC with an 'owner' process.
713 *
714 * A process can be associated with 0..NCPUS*NHWPMC PMCs during its
715 * lifetime, where NCPUS is the numbers of CPUS in the system and
716 * NHWPMC is the number of hardware PMCs per CPU.  These are
717 * maintained in the list headed by the 'po_pmcs' to save on space.
718 *
719 */
720
721struct pmc_owner  {
722	LIST_ENTRY(pmc_owner)	po_next;	/* hash chain */
723	LIST_ENTRY(pmc_owner)	po_ssnext;	/* list of SS PMC owners */
724	LIST_HEAD(, pmc)	po_pmcs;	/* owned PMC list */
725	TAILQ_HEAD(, pmclog_buffer) po_logbuffers; /* (o) logbuffer list */
726	struct mtx		po_mtx;		/* spin lock for (o) */
727	struct proc		*po_owner;	/* owner proc */
728	uint32_t		po_flags;	/* (k) flags PMC_PO_* */
729	struct proc		*po_kthread;	/* (k) helper kthread */
730	struct pmclog_buffer	*po_curbuf;	/* current log buffer */
731	struct file		*po_file;	/* file reference */
732	int			po_error;	/* recorded error */
733	int			po_sscount;	/* # SS PMCs owned */
734};
735
736#define	PMC_PO_OWNS_LOGFILE		0x00000001 /* has a log file */
737#define	PMC_PO_IN_FLUSH			0x00000010 /* in the middle of a flush */
738
739/*
740 * struct pmc_hw -- describe the state of the PMC hardware
741 *
742 * When in use, a HW PMC is associated with one allocated 'struct pmc'
743 * pointed to by field 'phw_pmc'.  When inactive, this field is NULL.
744 *
745 * On an SMP box, one or more HW PMC's in process virtual mode with
746 * the same 'phw_pmc' could be executing on different CPUs.  In order
747 * to handle this case correctly, we need to ensure that only
748 * incremental counts get added to the saved value in the associated
749 * 'struct pmc'.  The 'phw_save' field is used to keep the saved PMC
750 * value at the time the hardware is started during this context
751 * switch (i.e., the difference between the new (hardware) count and
752 * the saved count is atomically added to the count field in 'struct
753 * pmc' at context switch time).
754 *
755 */
756
757struct pmc_hw {
758	uint32_t	phw_state;	/* see PHW_* macros below */
759	struct pmc	*phw_pmc;	/* current thread PMC */
760};
761
762#define	PMC_PHW_RI_MASK		0x000000FF
763#define	PMC_PHW_CPU_SHIFT	8
764#define	PMC_PHW_CPU_MASK	0x0000FF00
765#define	PMC_PHW_FLAGS_SHIFT	16
766#define	PMC_PHW_FLAGS_MASK	0xFFFF0000
767
768#define	PMC_PHW_INDEX_TO_STATE(ri)	((ri) & PMC_PHW_RI_MASK)
769#define	PMC_PHW_STATE_TO_INDEX(state)	((state) & PMC_PHW_RI_MASK)
770#define	PMC_PHW_CPU_TO_STATE(cpu)	(((cpu) << PMC_PHW_CPU_SHIFT) & \
771	PMC_PHW_CPU_MASK)
772#define	PMC_PHW_STATE_TO_CPU(state)	(((state) & PMC_PHW_CPU_MASK) >> \
773	PMC_PHW_CPU_SHIFT)
774#define	PMC_PHW_FLAGS_TO_STATE(flags)	(((flags) << PMC_PHW_FLAGS_SHIFT) & \
775	PMC_PHW_FLAGS_MASK)
776#define	PMC_PHW_STATE_TO_FLAGS(state)	(((state) & PMC_PHW_FLAGS_MASK) >> \
777	PMC_PHW_FLAGS_SHIFT)
778#define	PMC_PHW_FLAG_IS_ENABLED		(PMC_PHW_FLAGS_TO_STATE(0x01))
779#define	PMC_PHW_FLAG_IS_SHAREABLE	(PMC_PHW_FLAGS_TO_STATE(0x02))
780
781/*
782 * struct pmc_sample
783 *
784 * Space for N (tunable) PC samples and associated control data.
785 */
786
787struct pmc_sample {
788	uintfptr_t		ps_pc;		/* PC value at interrupt */
789	struct pmc		*ps_pmc;	/* interrupting PMC */
790	int			ps_usermode;	/* true for user mode PCs */
791	pid_t			ps_pid;		/* process PID or -1 */
792};
793
794struct pmc_samplebuffer {
795	struct pmc_sample * volatile ps_read;	/* read pointer */
796	struct pmc_sample * volatile ps_write;	/* write pointer */
797	struct pmc_sample	*ps_fence;	/* one beyond ps_samples[] */
798	struct pmc_sample	ps_samples[];	/* array of sample entries */
799};
800
801
802/*
803 * struct pmc_cpustate
804 *
805 * A CPU is modelled as a collection of HW PMCs with space for additional
806 * flags.
807 */
808
809struct pmc_cpu {
810	uint32_t	pc_state;	/* physical cpu number + flags */
811	struct pmc_samplebuffer *pc_sb; /* space for samples */
812	struct pmc_hw	*pc_hwpmcs[];	/* 'npmc' pointers */
813	/* other machine dependent fields come here */
814};
815
816#define	PMC_PCPU_CPU_MASK		0x000000FF
817#define	PMC_PCPU_FLAGS_MASK		0xFFFFFF00
818#define	PMC_PCPU_FLAGS_SHIFT		8
819#define	PMC_PCPU_STATE_TO_CPU(S)	((S) & PMC_PCPU_CPU_MASK)
820#define	PMC_PCPU_STATE_TO_FLAGS(S)	(((S) & PMC_PCPU_FLAGS_MASK) >> PMC_PCPU_FLAGS_SHIFT)
821#define	PMC_PCPU_FLAGS_TO_STATE(F)	(((F) << PMC_PCPU_FLAGS_SHIFT) & PMC_PCPU_FLAGS_MASK)
822#define	PMC_PCPU_CPU_TO_STATE(C)	((C) & PMC_PCPU_CPU_MASK)
823#define	PMC_PCPU_FLAG_HTT		(PMC_PCPU_FLAGS_TO_STATE(0x1))
824
825/*
826 * struct pmc_binding
827 *
828 * CPU binding information.
829 */
830
831struct pmc_binding {
832	int	pb_bound;	/* is bound? */
833	int	pb_cpu;		/* if so, to which CPU */
834};
835
836/*
837 * struct pmc_mdep
838 *
839 * Machine dependent bits needed per CPU type.
840 */
841
842struct pmc_mdep  {
843	uint32_t	pmd_cputype;    /* from enum pmc_cputype */
844	uint32_t	pmd_npmc;	/* max PMCs per CPU */
845	uint32_t	pmd_nclass;	/* # PMC classes supported */
846	struct pmc_classinfo  pmd_classes[PMC_CLASS_MAX];
847	int		pmd_nclasspmcs[PMC_CLASS_MAX];
848
849	/*
850	 * Methods
851	 */
852
853	int (*pmd_init)(int _cpu);    /* machine dependent initialization */
854	int (*pmd_cleanup)(int _cpu); /* machine dependent cleanup  */
855
856	/* thread context switch in/out */
857	int (*pmd_switch_in)(struct pmc_cpu *_p, struct pmc_process *_pp);
858	int (*pmd_switch_out)(struct pmc_cpu *_p, struct pmc_process *_pp);
859
860	/* configuring/reading/writing the hardware PMCs */
861	int (*pmd_config_pmc)(int _cpu, int _ri, struct pmc *_pm);
862	int (*pmd_get_config)(int _cpu, int _ri, struct pmc **_ppm);
863	int (*pmd_read_pmc)(int _cpu, int _ri, pmc_value_t *_value);
864	int (*pmd_write_pmc)(int _cpu, int _ri, pmc_value_t _value);
865
866	/* pmc allocation/release */
867	int (*pmd_allocate_pmc)(int _cpu, int _ri, struct pmc *_t,
868		const struct pmc_op_pmcallocate *_a);
869	int (*pmd_release_pmc)(int _cpu, int _ri, struct pmc *_pm);
870
871	/* starting and stopping PMCs */
872	int (*pmd_start_pmc)(int _cpu, int _ri);
873	int (*pmd_stop_pmc)(int _cpu, int _ri);
874
875	/* handle a PMC interrupt */
876	int (*pmd_intr)(int _cpu, uintptr_t _pc, int _usermode);
877
878	int (*pmd_describe)(int _cpu, int _ri, struct pmc_info *_pi,
879		struct pmc **_ppmc);
880
881	int (*pmd_get_msr)(int _ri, uint32_t *_msr);
882
883};
884
885/*
886 * Per-CPU state.  This is an array of 'mp_ncpu' pointers
887 * to struct pmc_cpu descriptors.
888 */
889
890extern struct pmc_cpu **pmc_pcpu;
891extern struct pmc_mdep *md;
892
893/* driver statistics */
894extern struct pmc_op_getdriverstats pmc_stats;
895
896#if	defined(DEBUG) && DEBUG
897
898/* debug flags, major flag groups */
899struct pmc_debugflags {
900	int	pdb_CPU;
901	int	pdb_CSW;
902	int	pdb_LOG;
903	int	pdb_MDP;
904	int	pdb_MOD;
905	int	pdb_OWN;
906	int	pdb_PMC;
907	int	pdb_PRC;
908	int	pdb_SAM;
909};
910
911extern struct pmc_debugflags pmc_debugflags;
912
913#define	PMC_DEBUG_STRSIZE		128
914#define	PMC_DEBUG_DEFAULT_FLAGS		{ 0, 0, 0, 0, 0, 0, 0, 0 }
915
916#define	PMCDBG(M,N,L,F,...) do {					\
917	if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N))	\
918		printf(#M ":" #N ":" #L  ": " F "\n", __VA_ARGS__);	\
919} while (0)
920
921/* Major numbers */
922#define	PMC_DEBUG_MAJ_CPU		0 /* cpu switches */
923#define	PMC_DEBUG_MAJ_CSW		1 /* context switches */
924#define	PMC_DEBUG_MAJ_LOG		2 /* logging */
925#define	PMC_DEBUG_MAJ_MDP		3 /* machine dependent */
926#define	PMC_DEBUG_MAJ_MOD		4 /* misc module infrastructure */
927#define	PMC_DEBUG_MAJ_OWN		5 /* owner */
928#define	PMC_DEBUG_MAJ_PMC		6 /* pmc management */
929#define	PMC_DEBUG_MAJ_PRC		7 /* processes */
930#define	PMC_DEBUG_MAJ_SAM		8 /* sampling */
931
932/* Minor numbers */
933
934/* Common (8 bits) */
935#define	PMC_DEBUG_MIN_ALL		0 /* allocation */
936#define	PMC_DEBUG_MIN_REL		1 /* release */
937#define	PMC_DEBUG_MIN_OPS		2 /* ops: start, stop, ... */
938#define	PMC_DEBUG_MIN_INI		3 /* init */
939#define	PMC_DEBUG_MIN_FND		4 /* find */
940
941/* MODULE */
942#define	PMC_DEBUG_MIN_PMH 	       14 /* pmc_hook */
943#define	PMC_DEBUG_MIN_PMS	       15 /* pmc_syscall */
944
945/* OWN */
946#define	PMC_DEBUG_MIN_ORM		8 /* owner remove */
947#define	PMC_DEBUG_MIN_OMR		9 /* owner maybe remove */
948
949/* PROCESSES */
950#define	PMC_DEBUG_MIN_TLK		8 /* link target */
951#define	PMC_DEBUG_MIN_TUL		9 /* unlink target */
952#define	PMC_DEBUG_MIN_EXT	       10 /* process exit */
953#define	PMC_DEBUG_MIN_EXC	       11 /* process exec */
954#define	PMC_DEBUG_MIN_FRK	       12 /* process fork */
955#define	PMC_DEBUG_MIN_ATT	       13 /* attach/detach */
956#define	PMC_DEBUG_MIN_SIG	       14 /* signalling */
957
958/* CONTEXT SWITCHES */
959#define	PMC_DEBUG_MIN_SWI		8 /* switch in */
960#define	PMC_DEBUG_MIN_SWO		9 /* switch out */
961
962/* PMC */
963#define	PMC_DEBUG_MIN_REG		8 /* pmc register */
964#define	PMC_DEBUG_MIN_ALR		9 /* allocate row */
965
966/* MACHINE DEPENDENT LAYER */
967#define	PMC_DEBUG_MIN_REA		8 /* read */
968#define	PMC_DEBUG_MIN_WRI		9 /* write */
969#define	PMC_DEBUG_MIN_CFG	       10 /* config */
970#define	PMC_DEBUG_MIN_STA	       11 /* start */
971#define	PMC_DEBUG_MIN_STO	       12 /* stop */
972#define	PMC_DEBUG_MIN_INT	       13 /* interrupts */
973
974/* CPU */
975#define	PMC_DEBUG_MIN_BND	       	8 /* bind */
976#define	PMC_DEBUG_MIN_SEL		9 /* select */
977
978/* LOG */
979#define	PMC_DEBUG_MIN_GTB		8 /* get buf */
980#define	PMC_DEBUG_MIN_SIO		9 /* schedule i/o */
981#define	PMC_DEBUG_MIN_FLS	       10 /* flush */
982#define	PMC_DEBUG_MIN_SAM	       11 /* sample */
983
984#else
985#define	PMCDBG(M,N,L,F,...)		/* nothing */
986#endif
987
988/* declare a dedicated memory pool */
989MALLOC_DECLARE(M_PMC);
990
991/*
992 * Functions
993 */
994
995struct pmc_mdep *pmc_md_initialize(void);	/* MD init function */
996int	pmc_getrowdisp(int _ri);
997int	pmc_process_interrupt(int _cpu, struct pmc *_pm, uintfptr_t _pc,
998    int _usermode);
999
1000#endif /* _KERNEL */
1001#endif /* _SYS_PMC_H_ */
1002