1178479Sjb/*
2178479Sjb * CDDL HEADER START
3178479Sjb *
4178479Sjb * The contents of this file are subject to the terms of the
5178479Sjb * Common Development and Distribution License, Version 1.0 only
6178479Sjb * (the "License").  You may not use this file except in compliance
7178479Sjb * with the License.
8178479Sjb *
9178479Sjb * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10178479Sjb * or http://www.opensolaris.org/os/licensing.
11178479Sjb * See the License for the specific language governing permissions
12178479Sjb * and limitations under the License.
13178479Sjb *
14178479Sjb * When distributing Covered Code, include this CDDL HEADER in each
15178479Sjb * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16178479Sjb * If applicable, add the following below this CDDL HEADER, with the
17178479Sjb * fields enclosed by brackets "[]" replaced with your own identifying
18178479Sjb * information: Portions Copyright [yyyy] [name of copyright owner]
19178479Sjb *
20178479Sjb * CDDL HEADER END
21178479Sjb */
22178479Sjb
23178479Sjb/*
24178479Sjb * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
25178479Sjb * Use is subject to license terms.
26178479Sjb */
27178479Sjb
28178479Sjb#pragma ident	"%Z%%M%	%I%	%E% SMI"
29178479Sjb
30178479Sjb#include <dt_impl.h>
31178479Sjb#include <stddef.h>
32178479Sjb#include <errno.h>
33178479Sjb#include <assert.h>
34178479Sjb#include <time.h>
35178479Sjb
36178479Sjbstatic const struct {
37178479Sjb	int dtslt_option;
38178479Sjb	size_t dtslt_offs;
39178479Sjb} _dtrace_sleeptab[] = {
40178479Sjb	{ DTRACEOPT_STATUSRATE, offsetof(dtrace_hdl_t, dt_laststatus) },
41178479Sjb	{ DTRACEOPT_AGGRATE, offsetof(dtrace_hdl_t, dt_lastagg) },
42178479Sjb	{ DTRACEOPT_SWITCHRATE, offsetof(dtrace_hdl_t, dt_lastswitch) },
43178479Sjb	{ DTRACEOPT_MAX, 0 }
44178479Sjb};
45178479Sjb
46178479Sjbvoid
47178479Sjbdtrace_sleep(dtrace_hdl_t *dtp)
48178479Sjb{
49178479Sjb	dt_proc_hash_t *dph = dtp->dt_procs;
50178479Sjb	dtrace_optval_t policy = dtp->dt_options[DTRACEOPT_BUFPOLICY];
51178479Sjb	dt_proc_notify_t *dprn;
52178479Sjb
53178479Sjb	hrtime_t earliest = INT64_MAX;
54178479Sjb	struct timespec tv;
55178479Sjb	hrtime_t now;
56178479Sjb	int i;
57178479Sjb
58178479Sjb	for (i = 0; _dtrace_sleeptab[i].dtslt_option < DTRACEOPT_MAX; i++) {
59178479Sjb		uintptr_t a = (uintptr_t)dtp + _dtrace_sleeptab[i].dtslt_offs;
60178479Sjb		int opt = _dtrace_sleeptab[i].dtslt_option;
61178479Sjb		dtrace_optval_t interval = dtp->dt_options[opt];
62178479Sjb
63178479Sjb		/*
64178479Sjb		 * If the buffering policy is set to anything other than
65178479Sjb		 * "switch", we ignore the aggrate and switchrate -- they're
66178479Sjb		 * meaningless.
67178479Sjb		 */
68178479Sjb		if (policy != DTRACEOPT_BUFPOLICY_SWITCH &&
69178479Sjb		    _dtrace_sleeptab[i].dtslt_option != DTRACEOPT_STATUSRATE)
70178479Sjb			continue;
71178479Sjb
72178479Sjb		if (*((hrtime_t *)a) + interval < earliest)
73178479Sjb			earliest = *((hrtime_t *)a) + interval;
74178479Sjb	}
75178479Sjb
76178479Sjb	(void) pthread_mutex_lock(&dph->dph_lock);
77178479Sjb
78178479Sjb	now = gethrtime();
79178479Sjb
80178479Sjb	if (earliest < now) {
81178479Sjb		(void) pthread_mutex_unlock(&dph->dph_lock);
82178479Sjb		return; /* sleep duration has already past */
83178479Sjb	}
84178479Sjb
85178551Sjb#if defined(sun)
86178479Sjb	tv.tv_sec = (earliest - now) / NANOSEC;
87178479Sjb	tv.tv_nsec = (earliest - now) % NANOSEC;
88178479Sjb
89178479Sjb	/*
90178479Sjb	 * Wait for either 'tv' nanoseconds to pass or to receive notification
91178479Sjb	 * that a process is in an interesting state.  Regardless of why we
92178479Sjb	 * awaken, iterate over any pending notifications and process them.
93178479Sjb	 */
94178479Sjb	(void) pthread_cond_reltimedwait_np(&dph->dph_cv, &dph->dph_lock, &tv);
95178551Sjb#else
96178551Sjb	earliest -= now;
97178551Sjb	clock_gettime(CLOCK_REALTIME,&tv);
98178551Sjb	tv.tv_sec += earliest / NANOSEC;
99178551Sjb	tv.tv_nsec += earliest % NANOSEC;
100178551Sjb	while (tv.tv_nsec > NANOSEC) {
101178551Sjb		tv.tv_sec += 1;
102178551Sjb		tv.tv_nsec -= NANOSEC;
103178551Sjb	}
104178479Sjb
105178551Sjb	/*
106178551Sjb	 * Wait for either 'tv' nanoseconds to pass or to receive notification
107178551Sjb	 * that a process is in an interesting state.  Regardless of why we
108178551Sjb	 * awaken, iterate over any pending notifications and process them.
109178551Sjb	 */
110178551Sjb	(void) pthread_cond_timedwait(&dph->dph_cv, &dph->dph_lock, &tv);
111178551Sjb#endif
112178551Sjb
113178479Sjb	while ((dprn = dph->dph_notify) != NULL) {
114178479Sjb		if (dtp->dt_prochdlr != NULL) {
115178479Sjb			char *err = dprn->dprn_errmsg;
116178479Sjb			if (*err == '\0')
117178479Sjb				err = NULL;
118178479Sjb
119178479Sjb			dtp->dt_prochdlr(dprn->dprn_dpr->dpr_proc, err,
120178479Sjb			    dtp->dt_procarg);
121178479Sjb		}
122178479Sjb
123178479Sjb		dph->dph_notify = dprn->dprn_next;
124178479Sjb		dt_free(dtp, dprn);
125178479Sjb	}
126178479Sjb
127178479Sjb	(void) pthread_mutex_unlock(&dph->dph_lock);
128178479Sjb}
129178479Sjb
130178479Sjbint
131178479Sjbdtrace_status(dtrace_hdl_t *dtp)
132178479Sjb{
133178479Sjb	int gen = dtp->dt_statusgen;
134178479Sjb	dtrace_optval_t interval = dtp->dt_options[DTRACEOPT_STATUSRATE];
135178479Sjb	hrtime_t now = gethrtime();
136178479Sjb
137178479Sjb	if (!dtp->dt_active)
138178479Sjb		return (DTRACE_STATUS_NONE);
139178479Sjb
140178479Sjb	if (dtp->dt_stopped)
141178479Sjb		return (DTRACE_STATUS_STOPPED);
142178479Sjb
143178479Sjb	if (dtp->dt_laststatus != 0) {
144178479Sjb		if (now - dtp->dt_laststatus < interval)
145178479Sjb			return (DTRACE_STATUS_NONE);
146178479Sjb
147178479Sjb		dtp->dt_laststatus += interval;
148178479Sjb	} else {
149178479Sjb		dtp->dt_laststatus = now;
150178479Sjb	}
151178479Sjb
152178479Sjb	if (dt_ioctl(dtp, DTRACEIOC_STATUS, &dtp->dt_status[gen]) == -1)
153178479Sjb		return (dt_set_errno(dtp, errno));
154178479Sjb
155178479Sjb	dtp->dt_statusgen ^= 1;
156178479Sjb
157178479Sjb	if (dt_handle_status(dtp, &dtp->dt_status[dtp->dt_statusgen],
158178479Sjb	    &dtp->dt_status[gen]) == -1)
159178479Sjb		return (-1);
160178479Sjb
161178479Sjb	if (dtp->dt_status[gen].dtst_exiting) {
162178479Sjb		if (!dtp->dt_stopped)
163178479Sjb			(void) dtrace_stop(dtp);
164178479Sjb
165178479Sjb		return (DTRACE_STATUS_EXITED);
166178479Sjb	}
167178479Sjb
168178479Sjb	if (dtp->dt_status[gen].dtst_filled == 0)
169178479Sjb		return (DTRACE_STATUS_OKAY);
170178479Sjb
171178479Sjb	if (dtp->dt_options[DTRACEOPT_BUFPOLICY] != DTRACEOPT_BUFPOLICY_FILL)
172178479Sjb		return (DTRACE_STATUS_OKAY);
173178479Sjb
174178479Sjb	if (!dtp->dt_stopped) {
175178479Sjb		if (dtrace_stop(dtp) == -1)
176178479Sjb			return (-1);
177178479Sjb	}
178178479Sjb
179178479Sjb	return (DTRACE_STATUS_FILLED);
180178479Sjb}
181178479Sjb
182178479Sjbint
183178479Sjbdtrace_go(dtrace_hdl_t *dtp)
184178479Sjb{
185178551Sjb	dtrace_enable_io_t args;
186178479Sjb	void *dof;
187178479Sjb	int err;
188178479Sjb
189178479Sjb	if (dtp->dt_active)
190178479Sjb		return (dt_set_errno(dtp, EINVAL));
191178479Sjb
192178479Sjb	/*
193178479Sjb	 * If a dtrace:::ERROR program and callback are registered, enable the
194178479Sjb	 * program before we start tracing.  If this fails for a vector open
195178479Sjb	 * with ENOTTY, we permit dtrace_go() to succeed so that vector clients
196178479Sjb	 * such as mdb's dtrace module can execute the rest of dtrace_go() even
197178479Sjb	 * though they do not provide support for the DTRACEIOC_ENABLE ioctl.
198178479Sjb	 */
199178479Sjb	if (dtp->dt_errprog != NULL &&
200178479Sjb	    dtrace_program_exec(dtp, dtp->dt_errprog, NULL) == -1 && (
201178479Sjb	    dtp->dt_errno != ENOTTY || dtp->dt_vector == NULL))
202178479Sjb		return (-1); /* dt_errno has been set for us */
203178479Sjb
204178479Sjb	if ((dof = dtrace_getopt_dof(dtp)) == NULL)
205178479Sjb		return (-1); /* dt_errno has been set for us */
206178479Sjb
207178551Sjb	args.dof = dof;
208178551Sjb	args.n_matched = 0;
209178551Sjb	err = dt_ioctl(dtp, DTRACEIOC_ENABLE, &args);
210178479Sjb	dtrace_dof_destroy(dtp, dof);
211178479Sjb
212178479Sjb	if (err == -1 && (errno != ENOTTY || dtp->dt_vector == NULL))
213178479Sjb		return (dt_set_errno(dtp, errno));
214178479Sjb
215178479Sjb	if (dt_ioctl(dtp, DTRACEIOC_GO, &dtp->dt_beganon) == -1) {
216178479Sjb		if (errno == EACCES)
217178479Sjb			return (dt_set_errno(dtp, EDT_DESTRUCTIVE));
218178479Sjb
219178479Sjb		if (errno == EALREADY)
220178479Sjb			return (dt_set_errno(dtp, EDT_ISANON));
221178479Sjb
222178479Sjb		if (errno == ENOENT)
223178479Sjb			return (dt_set_errno(dtp, EDT_NOANON));
224178479Sjb
225178479Sjb		if (errno == E2BIG)
226178479Sjb			return (dt_set_errno(dtp, EDT_ENDTOOBIG));
227178479Sjb
228178479Sjb		if (errno == ENOSPC)
229178479Sjb			return (dt_set_errno(dtp, EDT_BUFTOOSMALL));
230178479Sjb
231178479Sjb		return (dt_set_errno(dtp, errno));
232178479Sjb	}
233178479Sjb
234178479Sjb	dtp->dt_active = 1;
235178479Sjb
236178479Sjb	if (dt_options_load(dtp) == -1)
237178479Sjb		return (dt_set_errno(dtp, errno));
238178479Sjb
239178479Sjb	return (dt_aggregate_go(dtp));
240178479Sjb}
241178479Sjb
242178479Sjbint
243178479Sjbdtrace_stop(dtrace_hdl_t *dtp)
244178479Sjb{
245178479Sjb	int gen = dtp->dt_statusgen;
246178479Sjb
247178479Sjb	if (dtp->dt_stopped)
248178479Sjb		return (0);
249178479Sjb
250178479Sjb	if (dt_ioctl(dtp, DTRACEIOC_STOP, &dtp->dt_endedon) == -1)
251178479Sjb		return (dt_set_errno(dtp, errno));
252178479Sjb
253178479Sjb	dtp->dt_stopped = 1;
254178479Sjb
255178479Sjb	/*
256178479Sjb	 * Now that we're stopped, we're going to get status one final time.
257178479Sjb	 */
258178479Sjb	if (dt_ioctl(dtp, DTRACEIOC_STATUS, &dtp->dt_status[gen]) == -1)
259178479Sjb		return (dt_set_errno(dtp, errno));
260178479Sjb
261178479Sjb	if (dt_handle_status(dtp, &dtp->dt_status[gen ^ 1],
262178479Sjb	    &dtp->dt_status[gen]) == -1)
263178479Sjb		return (-1);
264178479Sjb
265178479Sjb	return (0);
266178479Sjb}
267178479Sjb
268178479Sjb
269178479Sjbdtrace_workstatus_t
270178479Sjbdtrace_work(dtrace_hdl_t *dtp, FILE *fp,
271178479Sjb    dtrace_consume_probe_f *pfunc, dtrace_consume_rec_f *rfunc, void *arg)
272178479Sjb{
273178479Sjb	int status = dtrace_status(dtp);
274178479Sjb	dtrace_optval_t policy = dtp->dt_options[DTRACEOPT_BUFPOLICY];
275178479Sjb	dtrace_workstatus_t rval;
276178479Sjb
277178479Sjb	switch (status) {
278178479Sjb	case DTRACE_STATUS_EXITED:
279178479Sjb	case DTRACE_STATUS_FILLED:
280178479Sjb	case DTRACE_STATUS_STOPPED:
281178479Sjb		/*
282178479Sjb		 * Tracing is stopped.  We now want to force dtrace_consume()
283178479Sjb		 * and dtrace_aggregate_snap() to proceed, regardless of
284178479Sjb		 * switchrate and aggrate.  We do this by clearing the times.
285178479Sjb		 */
286178479Sjb		dtp->dt_lastswitch = 0;
287178479Sjb		dtp->dt_lastagg = 0;
288178479Sjb		rval = DTRACE_WORKSTATUS_DONE;
289178479Sjb		break;
290178479Sjb
291178479Sjb	case DTRACE_STATUS_NONE:
292178479Sjb	case DTRACE_STATUS_OKAY:
293178479Sjb		rval = DTRACE_WORKSTATUS_OKAY;
294178479Sjb		break;
295178479Sjb
296178479Sjb	case -1:
297178479Sjb		return (DTRACE_WORKSTATUS_ERROR);
298178479Sjb	}
299178479Sjb
300178479Sjb	if ((status == DTRACE_STATUS_NONE || status == DTRACE_STATUS_OKAY) &&
301178479Sjb	    policy != DTRACEOPT_BUFPOLICY_SWITCH) {
302178479Sjb		/*
303178479Sjb		 * There either isn't any status or things are fine -- and
304178479Sjb		 * this is a "ring" or "fill" buffer.  We don't want to consume
305178479Sjb		 * any of the trace data or snapshot the aggregations; we just
306178479Sjb		 * return.
307178479Sjb		 */
308178479Sjb		assert(rval == DTRACE_WORKSTATUS_OKAY);
309178479Sjb		return (rval);
310178479Sjb	}
311178479Sjb
312178479Sjb	if (dtrace_aggregate_snap(dtp) == -1)
313178479Sjb		return (DTRACE_WORKSTATUS_ERROR);
314178479Sjb
315178479Sjb	if (dtrace_consume(dtp, fp, pfunc, rfunc, arg) == -1)
316178479Sjb		return (DTRACE_WORKSTATUS_ERROR);
317178479Sjb
318178479Sjb	return (rval);
319178479Sjb}
320