1179237Sjb/*
2179237Sjb * CDDL HEADER START
3179237Sjb *
4179237Sjb * The contents of this file are subject to the terms of the
5179237Sjb * Common Development and Distribution License (the "License").
6179237Sjb * You may not use this file except in compliance with the License.
7179237Sjb *
8179237Sjb * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9179237Sjb * or http://www.opensolaris.org/os/licensing.
10179237Sjb * See the License for the specific language governing permissions
11179237Sjb * and limitations under the License.
12179237Sjb *
13179237Sjb * When distributing Covered Code, include this CDDL HEADER in each
14179237Sjb * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15179237Sjb * If applicable, add the following below this CDDL HEADER, with the
16179237Sjb * fields enclosed by brackets "[]" replaced with your own identifying
17179237Sjb * information: Portions Copyright [yyyy] [name of copyright owner]
18179237Sjb *
19179237Sjb * CDDL HEADER END
20179237Sjb *
21179237Sjb * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
22179237Sjb *
23179237Sjb * $FreeBSD: releng/10.3/sys/cddl/dev/profile/profile.c 282748 2015-05-11 07:54:39Z avg $
24179237Sjb *
25179237Sjb */
26179237Sjb
27179237Sjb/*
28179237Sjb * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
29179237Sjb * Use is subject to license terms.
30179237Sjb */
31179237Sjb
32179237Sjb#include <sys/cdefs.h>
33179237Sjb#include <sys/param.h>
34179237Sjb#include <sys/systm.h>
35179237Sjb#include <sys/conf.h>
36179237Sjb#include <sys/cpuvar.h>
37179237Sjb#include <sys/fcntl.h>
38179237Sjb#include <sys/filio.h>
39179237Sjb#include <sys/kdb.h>
40179237Sjb#include <sys/kernel.h>
41179237Sjb#include <sys/kmem.h>
42179237Sjb#include <sys/kthread.h>
43179237Sjb#include <sys/limits.h>
44179237Sjb#include <sys/linker.h>
45179237Sjb#include <sys/lock.h>
46179237Sjb#include <sys/malloc.h>
47179237Sjb#include <sys/module.h>
48179237Sjb#include <sys/mutex.h>
49179237Sjb#include <sys/poll.h>
50179237Sjb#include <sys/proc.h>
51179237Sjb#include <sys/selinfo.h>
52179237Sjb#include <sys/smp.h>
53179237Sjb#include <sys/uio.h>
54179237Sjb#include <sys/unistd.h>
55282748Savg#include <machine/cpu.h>
56179237Sjb#include <machine/stdarg.h>
57179237Sjb
58179237Sjb#include <sys/dtrace.h>
59179237Sjb#include <sys/dtrace_bsd.h>
60179237Sjb
61179237Sjb#define	PROF_NAMELEN		15
62179237Sjb
63179237Sjb#define	PROF_PROFILE		0
64179237Sjb#define	PROF_TICK		1
65179237Sjb#define	PROF_PREFIX_PROFILE	"profile-"
66179237Sjb#define	PROF_PREFIX_TICK	"tick-"
67179237Sjb
68179237Sjb/*
69179237Sjb * Regardless of platform, there are five artificial frames in the case of the
70179237Sjb * profile provider:
71179237Sjb *
72179237Sjb *	profile_fire
73179237Sjb *	cyclic_expire
74179237Sjb *	cyclic_fire
75179237Sjb *	[ cbe ]
76179237Sjb *	[ locore ]
77179237Sjb *
78179237Sjb * On amd64, there are two frames associated with locore:  one in locore, and
79179237Sjb * another in common interrupt dispatch code.  (i386 has not been modified to
80179237Sjb * use this common layer.)  Further, on i386, the interrupted instruction
81179237Sjb * appears as its own stack frame.  All of this means that we need to add one
82179237Sjb * frame for amd64, and then take one away for both amd64 and i386.
83179237Sjb *
84179237Sjb * On SPARC, the picture is further complicated because the compiler
85179237Sjb * optimizes away tail-calls -- so the following frames are optimized away:
86179237Sjb *
87179237Sjb * 	profile_fire
88179237Sjb *	cyclic_expire
89179237Sjb *
90179237Sjb * This gives three frames.  However, on DEBUG kernels, the cyclic_expire
91179237Sjb * frame cannot be tail-call eliminated, yielding four frames in this case.
92179237Sjb *
93179237Sjb * All of the above constraints lead to the mess below.  Yes, the profile
94179237Sjb * provider should ideally figure this out on-the-fly by hiting one of its own
95179237Sjb * probes and then walking its own stack trace.  This is complicated, however,
96179237Sjb * and the static definition doesn't seem to be overly brittle.  Still, we
97179237Sjb * allow for a manual override in case we get it completely wrong.
98179237Sjb */
99179237Sjb#ifdef __amd64
100282748Savg#define	PROF_ARTIFICIAL_FRAMES	10
101179237Sjb#else
102179237Sjb#ifdef __i386
103179237Sjb#define	PROF_ARTIFICIAL_FRAMES	6
104179237Sjb#else
105179237Sjb#ifdef __sparc
106179237Sjb#ifdef DEBUG
107179237Sjb#define	PROF_ARTIFICIAL_FRAMES	4
108179237Sjb#else
109179237Sjb#define	PROF_ARTIFICIAL_FRAMES	3
110179237Sjb#endif
111179237Sjb#endif
112179237Sjb#endif
113179237Sjb#endif
114179237Sjb
115233409Sgonzo#ifdef __mips
116233409Sgonzo/*
117233409Sgonzo * This value is bogus just to make module compilable on mips
118233409Sgonzo */
119233409Sgonzo#define	PROF_ARTIFICIAL_FRAMES	3
120233409Sgonzo#endif
121233409Sgonzo
122242723Sjhibbits#ifdef __powerpc__
123242723Sjhibbits/*
124242723Sjhibbits * This value is bogus just to make module compilable on powerpc
125242723Sjhibbits */
126242723Sjhibbits#define	PROF_ARTIFICIAL_FRAMES	3
127242723Sjhibbits#endif
128242723Sjhibbits
129282748Savgstruct profile_probe_percpu;
130282748Savg
131179237Sjbtypedef struct profile_probe {
132179237Sjb	char		prof_name[PROF_NAMELEN];
133179237Sjb	dtrace_id_t	prof_id;
134179237Sjb	int		prof_kind;
135282748Savg#ifdef illumos
136179237Sjb	hrtime_t	prof_interval;
137179237Sjb	cyclic_id_t	prof_cyclic;
138282748Savg#else
139282748Savg	sbintime_t	prof_interval;
140282748Savg	struct callout	prof_cyclic;
141282748Savg	sbintime_t	prof_expected;
142282748Savg	struct profile_probe_percpu **prof_pcpus;
143282748Savg#endif
144179237Sjb} profile_probe_t;
145179237Sjb
146179237Sjbtypedef struct profile_probe_percpu {
147179237Sjb	hrtime_t	profc_expected;
148179237Sjb	hrtime_t	profc_interval;
149179237Sjb	profile_probe_t	*profc_probe;
150282748Savg#ifdef __FreeBSD__
151282748Savg	struct callout	profc_cyclic;
152282748Savg#endif
153179237Sjb} profile_probe_percpu_t;
154179237Sjb
155179237Sjbstatic d_open_t	profile_open;
156179237Sjbstatic int	profile_unload(void);
157179237Sjbstatic void	profile_create(hrtime_t, char *, int);
158179237Sjbstatic void	profile_destroy(void *, dtrace_id_t, void *);
159179237Sjbstatic void	profile_enable(void *, dtrace_id_t, void *);
160179237Sjbstatic void	profile_disable(void *, dtrace_id_t, void *);
161179237Sjbstatic void	profile_load(void *);
162179237Sjbstatic void	profile_provide(void *, dtrace_probedesc_t *);
163179237Sjb
164179237Sjbstatic int profile_rates[] = {
165179237Sjb    97, 199, 499, 997, 1999,
166179237Sjb    4001, 4999, 0, 0, 0,
167179237Sjb    0, 0, 0, 0, 0,
168179237Sjb    0, 0, 0, 0, 0
169179237Sjb};
170179237Sjb
171179237Sjbstatic int profile_ticks[] = {
172179237Sjb    1, 10, 100, 500, 1000,
173179237Sjb    5000, 0, 0, 0, 0,
174179237Sjb    0, 0, 0, 0, 0
175179237Sjb};
176179237Sjb
177179237Sjb/*
178179237Sjb * profile_max defines the upper bound on the number of profile probes that
179179237Sjb * can exist (this is to prevent malicious or clumsy users from exhausing
180179237Sjb * system resources by creating a slew of profile probes). At mod load time,
181179237Sjb * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's
182179237Sjb * present in the profile.conf file.
183179237Sjb */
184179237Sjb#define	PROFILE_MAX_DEFAULT	1000	/* default max. number of probes */
185179237Sjbstatic uint32_t profile_max = PROFILE_MAX_DEFAULT;
186179237Sjb					/* maximum number of profile probes */
187179237Sjbstatic uint32_t profile_total;		/* current number of profile probes */
188179237Sjb
189179237Sjbstatic struct cdevsw profile_cdevsw = {
190179237Sjb	.d_version	= D_VERSION,
191179237Sjb	.d_open		= profile_open,
192179237Sjb	.d_name		= "profile",
193179237Sjb};
194179237Sjb
195179237Sjbstatic dtrace_pattr_t profile_attr = {
196179237Sjb{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
197179237Sjb{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
198179237Sjb{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
199179237Sjb{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
200179237Sjb{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
201179237Sjb};
202179237Sjb
203179237Sjbstatic dtrace_pops_t profile_pops = {
204179237Sjb	profile_provide,
205179237Sjb	NULL,
206179237Sjb	profile_enable,
207179237Sjb	profile_disable,
208179237Sjb	NULL,
209179237Sjb	NULL,
210179237Sjb	NULL,
211179237Sjb	NULL,
212179237Sjb	NULL,
213179237Sjb	profile_destroy
214179237Sjb};
215179237Sjb
216179237Sjbstatic struct cdev		*profile_cdev;
217179237Sjbstatic dtrace_provider_id_t	profile_id;
218179237Sjbstatic hrtime_t			profile_interval_min = NANOSEC / 5000;	/* 5000 hz */
219179237Sjbstatic int			profile_aframes = 0;			/* override */
220179237Sjb
221282748Savgstatic sbintime_t
222282748Savgnsec_to_sbt(hrtime_t nsec)
223282748Savg{
224282748Savg	time_t sec;
225282748Savg
226282748Savg	/*
227282748Savg	 * We need to calculate nsec * 2^32 / 10^9
228282748Savg	 * Seconds and nanoseconds are split to avoid overflow.
229282748Savg	 */
230282748Savg	sec = nsec / NANOSEC;
231282748Savg	nsec = nsec % NANOSEC;
232282748Savg	return (((sbintime_t)sec << 32) | ((sbintime_t)nsec << 32) / NANOSEC);
233282748Savg}
234282748Savg
235282748Savgstatic hrtime_t
236282748Savgsbt_to_nsec(sbintime_t sbt)
237282748Savg{
238282748Savg
239282748Savg	return ((sbt >> 32) * NANOSEC +
240282748Savg	    (((uint32_t)sbt * (hrtime_t)NANOSEC) >> 32));
241282748Savg}
242282748Savg
243179237Sjbstatic void
244179237Sjbprofile_fire(void *arg)
245179237Sjb{
246179237Sjb	profile_probe_percpu_t *pcpu = arg;
247179237Sjb	profile_probe_t *prof = pcpu->profc_probe;
248179237Sjb	hrtime_t late;
249282748Savg	struct trapframe *frame;
250282748Savg	uintfptr_t pc, upc;
251179237Sjb
252282748Savg#ifdef illumos
253179237Sjb	late = gethrtime() - pcpu->profc_expected;
254282748Savg#else
255282748Savg	late = sbt_to_nsec(sbinuptime() - pcpu->profc_expected);
256282748Savg#endif
257282748Savg
258282748Savg	pc = 0;
259282748Savg	upc = 0;
260282748Savg
261282748Savg	/*
262282748Savg	 * td_intr_frame can be unset if this is a catch up event
263282748Savg	 * after waking up from idle sleep.
264282748Savg	 * This can only happen on a CPU idle thread.
265282748Savg	 */
266282748Savg	frame = curthread->td_intr_frame;
267282748Savg	if (frame != NULL) {
268282748Savg		if (TRAPF_USERMODE(frame))
269282748Savg			upc = TRAPF_PC(frame);
270282748Savg		else
271282748Savg			pc = TRAPF_PC(frame);
272282748Savg	}
273282748Savg	dtrace_probe(prof->prof_id, pc, upc, late, 0, 0);
274282748Savg
275179237Sjb	pcpu->profc_expected += pcpu->profc_interval;
276282748Savg	callout_schedule_sbt_curcpu(&pcpu->profc_cyclic,
277282748Savg	    pcpu->profc_expected, 0, C_DIRECT_EXEC | C_ABSOLUTE);
278179237Sjb}
279179237Sjb
280179237Sjbstatic void
281179237Sjbprofile_tick(void *arg)
282179237Sjb{
283179237Sjb	profile_probe_t *prof = arg;
284282748Savg	struct trapframe *frame;
285282748Savg	uintfptr_t pc, upc;
286179237Sjb
287282748Savg	pc = 0;
288282748Savg	upc = 0;
289282748Savg
290282748Savg	/*
291282748Savg	 * td_intr_frame can be unset if this is a catch up event
292282748Savg	 * after waking up from idle sleep.
293282748Savg	 * This can only happen on a CPU idle thread.
294282748Savg	 */
295282748Savg	frame = curthread->td_intr_frame;
296282748Savg	if (frame != NULL) {
297282748Savg		if (TRAPF_USERMODE(frame))
298282748Savg			upc = TRAPF_PC(frame);
299282748Savg		else
300282748Savg			pc = TRAPF_PC(frame);
301282748Savg	}
302282748Savg	dtrace_probe(prof->prof_id, pc, upc, 0, 0, 0);
303282748Savg
304282748Savg	prof->prof_expected += prof->prof_interval;
305282748Savg	callout_schedule_sbt(&prof->prof_cyclic,
306282748Savg	    prof->prof_expected, 0, C_DIRECT_EXEC | C_ABSOLUTE);
307179237Sjb}
308179237Sjb
309179237Sjbstatic void
310179237Sjbprofile_create(hrtime_t interval, char *name, int kind)
311179237Sjb{
312179237Sjb	profile_probe_t *prof;
313179237Sjb
314179237Sjb	if (interval < profile_interval_min)
315179237Sjb		return;
316179237Sjb
317179237Sjb	if (dtrace_probe_lookup(profile_id, NULL, NULL, name) != 0)
318179237Sjb		return;
319179237Sjb
320179237Sjb	atomic_add_32(&profile_total, 1);
321179237Sjb	if (profile_total > profile_max) {
322179237Sjb		atomic_add_32(&profile_total, -1);
323179237Sjb		return;
324179237Sjb	}
325179237Sjb
326179237Sjb	prof = kmem_zalloc(sizeof (profile_probe_t), KM_SLEEP);
327179237Sjb	(void) strcpy(prof->prof_name, name);
328282748Savg#ifdef illumos
329179237Sjb	prof->prof_interval = interval;
330179237Sjb	prof->prof_cyclic = CYCLIC_NONE;
331282748Savg#else
332282748Savg	prof->prof_interval = nsec_to_sbt(interval);
333282748Savg	callout_init(&prof->prof_cyclic, CALLOUT_MPSAFE);
334282748Savg#endif
335179237Sjb	prof->prof_kind = kind;
336179237Sjb	prof->prof_id = dtrace_probe_create(profile_id,
337179237Sjb	    NULL, NULL, name,
338179237Sjb	    profile_aframes ? profile_aframes : PROF_ARTIFICIAL_FRAMES, prof);
339179237Sjb}
340179237Sjb
341179237Sjb/*ARGSUSED*/
342179237Sjbstatic void
343179237Sjbprofile_provide(void *arg, dtrace_probedesc_t *desc)
344179237Sjb{
345179237Sjb	int i, j, rate, kind;
346179237Sjb	hrtime_t val = 0, mult = 1, len = 0;
347179237Sjb	char *name, *suffix = NULL;
348179237Sjb
349179237Sjb	const struct {
350179237Sjb		char *prefix;
351179237Sjb		int kind;
352179237Sjb	} types[] = {
353179237Sjb		{ PROF_PREFIX_PROFILE, PROF_PROFILE },
354179237Sjb		{ PROF_PREFIX_TICK, PROF_TICK },
355179237Sjb		{ 0, 0 }
356179237Sjb	};
357179237Sjb
358179237Sjb	const struct {
359179237Sjb		char *name;
360179237Sjb		hrtime_t mult;
361179237Sjb	} suffixes[] = {
362179237Sjb		{ "ns", 	NANOSEC / NANOSEC },
363179237Sjb		{ "nsec",	NANOSEC / NANOSEC },
364179237Sjb		{ "us",		NANOSEC / MICROSEC },
365179237Sjb		{ "usec",	NANOSEC / MICROSEC },
366179237Sjb		{ "ms",		NANOSEC / MILLISEC },
367179237Sjb		{ "msec",	NANOSEC / MILLISEC },
368179237Sjb		{ "s",		NANOSEC / SEC },
369179237Sjb		{ "sec",	NANOSEC / SEC },
370179237Sjb		{ "m",		NANOSEC * (hrtime_t)60 },
371179237Sjb		{ "min",	NANOSEC * (hrtime_t)60 },
372179237Sjb		{ "h",		NANOSEC * (hrtime_t)(60 * 60) },
373179237Sjb		{ "hour",	NANOSEC * (hrtime_t)(60 * 60) },
374179237Sjb		{ "d",		NANOSEC * (hrtime_t)(24 * 60 * 60) },
375179237Sjb		{ "day",	NANOSEC * (hrtime_t)(24 * 60 * 60) },
376179237Sjb		{ "hz",		0 },
377179237Sjb		{ NULL }
378179237Sjb	};
379179237Sjb
380179237Sjb	if (desc == NULL) {
381179237Sjb		char n[PROF_NAMELEN];
382179237Sjb
383179237Sjb		/*
384179237Sjb		 * If no description was provided, provide all of our probes.
385179237Sjb		 */
386179237Sjb		for (i = 0; i < sizeof (profile_rates) / sizeof (int); i++) {
387179237Sjb			if ((rate = profile_rates[i]) == 0)
388179237Sjb				continue;
389179237Sjb
390179237Sjb			(void) snprintf(n, PROF_NAMELEN, "%s%d",
391179237Sjb			    PROF_PREFIX_PROFILE, rate);
392179237Sjb			profile_create(NANOSEC / rate, n, PROF_PROFILE);
393179237Sjb		}
394179237Sjb
395179237Sjb		for (i = 0; i < sizeof (profile_ticks) / sizeof (int); i++) {
396179237Sjb			if ((rate = profile_ticks[i]) == 0)
397179237Sjb				continue;
398179237Sjb
399179237Sjb			(void) snprintf(n, PROF_NAMELEN, "%s%d",
400179237Sjb			    PROF_PREFIX_TICK, rate);
401179237Sjb			profile_create(NANOSEC / rate, n, PROF_TICK);
402179237Sjb		}
403179237Sjb
404179237Sjb		return;
405179237Sjb	}
406179237Sjb
407179237Sjb	name = desc->dtpd_name;
408179237Sjb
409179237Sjb	for (i = 0; types[i].prefix != NULL; i++) {
410179237Sjb		len = strlen(types[i].prefix);
411179237Sjb
412179237Sjb		if (strncmp(name, types[i].prefix, len) != 0)
413179237Sjb			continue;
414179237Sjb		break;
415179237Sjb	}
416179237Sjb
417179237Sjb	if (types[i].prefix == NULL)
418179237Sjb		return;
419179237Sjb
420179237Sjb	kind = types[i].kind;
421179237Sjb	j = strlen(name) - len;
422179237Sjb
423179237Sjb	/*
424179237Sjb	 * We need to start before any time suffix.
425179237Sjb	 */
426179237Sjb	for (j = strlen(name); j >= len; j--) {
427179237Sjb		if (name[j] >= '0' && name[j] <= '9')
428179237Sjb			break;
429179237Sjb		suffix = &name[j];
430179237Sjb	}
431179237Sjb
432179237Sjb	ASSERT(suffix != NULL);
433179237Sjb
434179237Sjb	/*
435179237Sjb	 * Now determine the numerical value present in the probe name.
436179237Sjb	 */
437179237Sjb	for (; j >= len; j--) {
438179237Sjb		if (name[j] < '0' || name[j] > '9')
439179237Sjb			return;
440179237Sjb
441179237Sjb		val += (name[j] - '0') * mult;
442179237Sjb		mult *= (hrtime_t)10;
443179237Sjb	}
444179237Sjb
445179237Sjb	if (val == 0)
446179237Sjb		return;
447179237Sjb
448179237Sjb	/*
449179237Sjb	 * Look-up the suffix to determine the multiplier.
450179237Sjb	 */
451179237Sjb	for (i = 0, mult = 0; suffixes[i].name != NULL; i++) {
452179237Sjb		if (strcasecmp(suffixes[i].name, suffix) == 0) {
453179237Sjb			mult = suffixes[i].mult;
454179237Sjb			break;
455179237Sjb		}
456179237Sjb	}
457179237Sjb
458179237Sjb	if (suffixes[i].name == NULL && *suffix != '\0')
459179237Sjb		return;
460179237Sjb
461179237Sjb	if (mult == 0) {
462179237Sjb		/*
463179237Sjb		 * The default is frequency-per-second.
464179237Sjb		 */
465179237Sjb		val = NANOSEC / val;
466179237Sjb	} else {
467179237Sjb		val *= mult;
468179237Sjb	}
469179237Sjb
470179237Sjb	profile_create(val, name, kind);
471179237Sjb}
472179237Sjb
473179237Sjb/* ARGSUSED */
474179237Sjbstatic void
475179237Sjbprofile_destroy(void *arg, dtrace_id_t id, void *parg)
476179237Sjb{
477179237Sjb	profile_probe_t *prof = parg;
478179237Sjb
479282748Savg#ifdef illumos
480179237Sjb	ASSERT(prof->prof_cyclic == CYCLIC_NONE);
481282748Savg#else
482282748Savg	ASSERT(!callout_active(&prof->prof_cyclic) && prof->prof_pcpus == NULL);
483282748Savg#endif
484179237Sjb	kmem_free(prof, sizeof (profile_probe_t));
485179237Sjb
486179237Sjb	ASSERT(profile_total >= 1);
487179237Sjb	atomic_add_32(&profile_total, -1);
488179237Sjb}
489179237Sjb
490282748Savg#ifdef illumos
491179237Sjb/*ARGSUSED*/
492179237Sjbstatic void
493179237Sjbprofile_online(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
494179237Sjb{
495179237Sjb	profile_probe_t *prof = arg;
496179237Sjb	profile_probe_percpu_t *pcpu;
497179237Sjb
498179237Sjb	pcpu = kmem_zalloc(sizeof (profile_probe_percpu_t), KM_SLEEP);
499179237Sjb	pcpu->profc_probe = prof;
500179237Sjb
501179237Sjb	hdlr->cyh_func = profile_fire;
502179237Sjb	hdlr->cyh_arg = pcpu;
503179237Sjb
504179237Sjb	when->cyt_interval = prof->prof_interval;
505179237Sjb	when->cyt_when = gethrtime() + when->cyt_interval;
506179237Sjb
507179237Sjb	pcpu->profc_expected = when->cyt_when;
508179237Sjb	pcpu->profc_interval = when->cyt_interval;
509179237Sjb}
510179237Sjb
511179237Sjb/*ARGSUSED*/
512179237Sjbstatic void
513179237Sjbprofile_offline(void *arg, cpu_t *cpu, void *oarg)
514179237Sjb{
515179237Sjb	profile_probe_percpu_t *pcpu = oarg;
516179237Sjb
517179237Sjb	ASSERT(pcpu->profc_probe == arg);
518179237Sjb	kmem_free(pcpu, sizeof (profile_probe_percpu_t));
519179237Sjb}
520179237Sjb
521179237Sjb/* ARGSUSED */
522179237Sjbstatic void
523179237Sjbprofile_enable(void *arg, dtrace_id_t id, void *parg)
524179237Sjb{
525179237Sjb	profile_probe_t *prof = parg;
526179237Sjb	cyc_omni_handler_t omni;
527179237Sjb	cyc_handler_t hdlr;
528179237Sjb	cyc_time_t when;
529179237Sjb
530179237Sjb	ASSERT(prof->prof_interval != 0);
531179237Sjb	ASSERT(MUTEX_HELD(&cpu_lock));
532179237Sjb
533179237Sjb	if (prof->prof_kind == PROF_TICK) {
534179237Sjb		hdlr.cyh_func = profile_tick;
535179237Sjb		hdlr.cyh_arg = prof;
536179237Sjb
537179237Sjb		when.cyt_interval = prof->prof_interval;
538179237Sjb		when.cyt_when = gethrtime() + when.cyt_interval;
539179237Sjb	} else {
540179237Sjb		ASSERT(prof->prof_kind == PROF_PROFILE);
541179237Sjb		omni.cyo_online = profile_online;
542179237Sjb		omni.cyo_offline = profile_offline;
543179237Sjb		omni.cyo_arg = prof;
544179237Sjb	}
545179237Sjb
546179237Sjb	if (prof->prof_kind == PROF_TICK) {
547179237Sjb		prof->prof_cyclic = cyclic_add(&hdlr, &when);
548179237Sjb	} else {
549179237Sjb		prof->prof_cyclic = cyclic_add_omni(&omni);
550179237Sjb	}
551179237Sjb}
552179237Sjb
553179237Sjb/* ARGSUSED */
554179237Sjbstatic void
555179237Sjbprofile_disable(void *arg, dtrace_id_t id, void *parg)
556179237Sjb{
557179237Sjb	profile_probe_t *prof = parg;
558179237Sjb
559179237Sjb	ASSERT(prof->prof_cyclic != CYCLIC_NONE);
560179237Sjb	ASSERT(MUTEX_HELD(&cpu_lock));
561179237Sjb
562179237Sjb	cyclic_remove(prof->prof_cyclic);
563179237Sjb	prof->prof_cyclic = CYCLIC_NONE;
564179237Sjb}
565179237Sjb
566282748Savg#else
567282748Savg
568179237Sjbstatic void
569282748Savgprofile_enable_omni(profile_probe_t *prof)
570282748Savg{
571282748Savg	profile_probe_percpu_t *pcpu;
572282748Savg	int cpu;
573282748Savg
574282748Savg	prof->prof_pcpus = kmem_zalloc((mp_maxid + 1) * sizeof(pcpu), KM_SLEEP);
575282748Savg	CPU_FOREACH(cpu) {
576282748Savg		pcpu = kmem_zalloc(sizeof(profile_probe_percpu_t), KM_SLEEP);
577282748Savg		prof->prof_pcpus[cpu] = pcpu;
578282748Savg		pcpu->profc_probe = prof;
579282748Savg		pcpu->profc_expected = sbinuptime() + prof->prof_interval;
580282748Savg		pcpu->profc_interval = prof->prof_interval;
581282748Savg		callout_init(&pcpu->profc_cyclic, CALLOUT_MPSAFE);
582282748Savg		callout_reset_sbt_on(&pcpu->profc_cyclic,
583282748Savg		    pcpu->profc_expected, 0, profile_fire, pcpu,
584282748Savg		    cpu, C_DIRECT_EXEC | C_ABSOLUTE);
585282748Savg	}
586282748Savg}
587282748Savg
588282748Savgstatic void
589282748Savgprofile_disable_omni(profile_probe_t *prof)
590282748Savg{
591282748Savg	profile_probe_percpu_t *pcpu;
592282748Savg	int cpu;
593282748Savg
594282748Savg	ASSERT(prof->prof_pcpus != NULL);
595282748Savg	CPU_FOREACH(cpu) {
596282748Savg		pcpu = prof->prof_pcpus[cpu];
597282748Savg		ASSERT(pcpu->profc_probe == prof);
598282748Savg		ASSERT(callout_active(&pcpu->profc_cyclic));
599282748Savg		callout_stop(&pcpu->profc_cyclic);
600282748Savg		callout_drain(&pcpu->profc_cyclic);
601282748Savg		kmem_free(pcpu, sizeof(profile_probe_percpu_t));
602282748Savg	}
603282748Savg	kmem_free(prof->prof_pcpus, (mp_maxid + 1) * sizeof(pcpu));
604282748Savg	prof->prof_pcpus = NULL;
605282748Savg}
606282748Savg
607282748Savg/* ARGSUSED */
608282748Savgstatic void
609282748Savgprofile_enable(void *arg, dtrace_id_t id, void *parg)
610282748Savg{
611282748Savg	profile_probe_t *prof = parg;
612282748Savg
613282748Savg	if (prof->prof_kind == PROF_TICK) {
614282748Savg		prof->prof_expected = sbinuptime() + prof->prof_interval;
615282748Savg		callout_reset_sbt(&prof->prof_cyclic,
616282748Savg		    prof->prof_expected, 0, profile_tick, prof,
617282748Savg		    C_DIRECT_EXEC | C_ABSOLUTE);
618282748Savg	} else {
619282748Savg		ASSERT(prof->prof_kind == PROF_PROFILE);
620282748Savg		profile_enable_omni(prof);
621282748Savg	}
622282748Savg}
623282748Savg
624282748Savg/* ARGSUSED */
625282748Savgstatic void
626282748Savgprofile_disable(void *arg, dtrace_id_t id, void *parg)
627282748Savg{
628282748Savg	profile_probe_t *prof = parg;
629282748Savg
630282748Savg	if (prof->prof_kind == PROF_TICK) {
631282748Savg		ASSERT(callout_active(&prof->prof_cyclic));
632282748Savg		callout_stop(&prof->prof_cyclic);
633282748Savg		callout_drain(&prof->prof_cyclic);
634282748Savg	} else {
635282748Savg		ASSERT(prof->prof_kind == PROF_PROFILE);
636282748Savg		profile_disable_omni(prof);
637282748Savg	}
638282748Savg}
639282748Savg#endif
640282748Savg
641282748Savgstatic void
642179237Sjbprofile_load(void *dummy)
643179237Sjb{
644179237Sjb	/* Create the /dev/dtrace/profile entry. */
645179237Sjb	profile_cdev = make_dev(&profile_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
646179237Sjb	    "dtrace/profile");
647179237Sjb
648179237Sjb	if (dtrace_register("profile", &profile_attr, DTRACE_PRIV_USER,
649179237Sjb	    NULL, &profile_pops, NULL, &profile_id) != 0)
650179237Sjb		return;
651179237Sjb}
652179237Sjb
653179237Sjb
654179237Sjbstatic int
655179237Sjbprofile_unload()
656179237Sjb{
657179237Sjb	int error = 0;
658179237Sjb
659179237Sjb	if ((error = dtrace_unregister(profile_id)) != 0)
660179237Sjb		return (error);
661179237Sjb
662179237Sjb	destroy_dev(profile_cdev);
663179237Sjb
664179237Sjb	return (error);
665179237Sjb}
666179237Sjb
667179237Sjb/* ARGSUSED */
668179237Sjbstatic int
669179237Sjbprofile_modevent(module_t mod __unused, int type, void *data __unused)
670179237Sjb{
671179237Sjb	int error = 0;
672179237Sjb
673179237Sjb	switch (type) {
674179237Sjb	case MOD_LOAD:
675179237Sjb		break;
676179237Sjb
677179237Sjb	case MOD_UNLOAD:
678179237Sjb		break;
679179237Sjb
680179237Sjb	case MOD_SHUTDOWN:
681179237Sjb		break;
682179237Sjb
683179237Sjb	default:
684179237Sjb		error = EOPNOTSUPP;
685179237Sjb		break;
686179237Sjb
687179237Sjb	}
688179237Sjb	return (error);
689179237Sjb}
690179237Sjb
691179237Sjb/* ARGSUSED */
692179237Sjbstatic int
693179237Sjbprofile_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused)
694179237Sjb{
695179237Sjb	return (0);
696179237Sjb}
697179237Sjb
698179237SjbSYSINIT(profile_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, profile_load, NULL);
699179237SjbSYSUNINIT(profile_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, profile_unload, NULL);
700179237Sjb
701179237SjbDEV_MODULE(profile, profile_modevent, NULL);
702179237SjbMODULE_VERSION(profile, 1);
703179237SjbMODULE_DEPEND(profile, dtrace, 1, 1, 1);
704179237SjbMODULE_DEPEND(profile, opensolaris, 1, 1, 1);
705