1209720Srpaulo/*
2209720Srpaulo * CDDL HEADER START
3209720Srpaulo *
4209720Srpaulo * The contents of this file are subject to the terms of the
5209720Srpaulo * Common Development and Distribution License (the "License").
6209720Srpaulo * You may not use this file except in compliance with the License.
7209720Srpaulo *
8209720Srpaulo * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9209720Srpaulo * or http://www.opensolaris.org/os/licensing.
10209720Srpaulo * See the License for the specific language governing permissions
11209720Srpaulo * and limitations under the License.
12209720Srpaulo *
13209720Srpaulo * When distributing Covered Code, include this CDDL HEADER in each
14209720Srpaulo * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15209720Srpaulo * If applicable, add the following below this CDDL HEADER, with the
16209720Srpaulo * fields enclosed by brackets "[]" replaced with your own identifying
17209720Srpaulo * information: Portions Copyright [yyyy] [name of copyright owner]
18209720Srpaulo *
19209720Srpaulo * CDDL HEADER END
20209720Srpaulo */
21209720Srpaulo
22209720Srpaulo/*
23209720Srpaulo * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24209720Srpaulo * Use is subject to license terms.
25209720Srpaulo */
26209720Srpaulo
27209720Srpaulo#include <sys/fasttrap_isa.h>
28209720Srpaulo#include <sys/fasttrap_impl.h>
29209720Srpaulo#include <sys/dtrace.h>
30209720Srpaulo#include <sys/dtrace_impl.h>
31209720Srpaulo#include <sys/cmn_err.h>
32209720Srpaulo#include <sys/frame.h>
33209720Srpaulo#include <sys/stack.h>
34209720Srpaulo#include <sys/sysmacros.h>
35209720Srpaulo#include <sys/trap.h>
36209720Srpaulo
37209720Srpaulo#include <v9/sys/machpcb.h>
38209720Srpaulo#include <v9/sys/privregs.h>
39209720Srpaulo
40209720Srpaulo/*
41209720Srpaulo * Lossless User-Land Tracing on SPARC
42209720Srpaulo * -----------------------------------
43209720Srpaulo *
44209720Srpaulo * The Basic Idea
45209720Srpaulo *
46209720Srpaulo * The most important design constraint is, of course, correct execution of
47209720Srpaulo * the user thread above all else. The next most important goal is rapid
48209720Srpaulo * execution. We combine execution of instructions in user-land with
49209720Srpaulo * emulation of certain instructions in the kernel to aim for complete
50209720Srpaulo * correctness and maximal performance.
51209720Srpaulo *
52209720Srpaulo * We take advantage of the split PC/NPC architecture to speed up logical
53209720Srpaulo * single-stepping; when we copy an instruction out to the scratch space in
54209720Srpaulo * the ulwp_t structure (held in the %g7 register on SPARC), we can
55209720Srpaulo * effectively single step by setting the PC to our scratch space and leaving
56209720Srpaulo * the NPC alone. This executes the replaced instruction and then continues
57209720Srpaulo * on without having to reenter the kernel as with single- stepping. The
58209720Srpaulo * obvious caveat is for instructions whose execution is PC dependant --
59209720Srpaulo * branches, call and link instructions (call and jmpl), and the rdpc
60209720Srpaulo * instruction. These instructions cannot be executed in the manner described
61209720Srpaulo * so they must be emulated in the kernel.
62209720Srpaulo *
63209720Srpaulo * Emulation for this small set of instructions if fairly simple; the most
64209720Srpaulo * difficult part being emulating branch conditions.
65209720Srpaulo *
66209720Srpaulo *
67209720Srpaulo * A Cache Heavy Portfolio
68209720Srpaulo *
69209720Srpaulo * It's important to note at this time that copying an instruction out to the
70209720Srpaulo * ulwp_t scratch space in user-land is rather complicated. SPARC has
71209720Srpaulo * separate data and instruction caches so any writes to the D$ (using a
72209720Srpaulo * store instruction for example) aren't necessarily reflected in the I$.
73209720Srpaulo * The flush instruction can be used to synchronize the two and must be used
74209720Srpaulo * for any self-modifying code, but the flush instruction only applies to the
75209720Srpaulo * primary address space (the absence of a flusha analogue to the flush
76209720Srpaulo * instruction that accepts an ASI argument is an obvious omission from SPARC
77209720Srpaulo * v9 where the notion of the alternate address space was introduced on
78209720Srpaulo * SPARC). To correctly copy out the instruction we must use a block store
79209720Srpaulo * that doesn't allocate in the D$ and ensures synchronization with the I$;
80209720Srpaulo * see dtrace_blksuword32() for the implementation  (this function uses
81209720Srpaulo * ASI_BLK_COMMIT_S to write a block through the secondary ASI in the manner
82209720Srpaulo * described). Refer to the UltraSPARC I/II manual for details on the
83209720Srpaulo * ASI_BLK_COMMIT_S ASI.
84209720Srpaulo *
85209720Srpaulo *
86209720Srpaulo * Return Subtleties
87209720Srpaulo *
88209720Srpaulo * When we're firing a return probe we need to expose the value returned by
89209720Srpaulo * the function being traced. Since the function can set the return value
90209720Srpaulo * in its last instruction, we need to fire the return probe only _after_
91209720Srpaulo * the effects of the instruction are apparent. For instructions that we
92209720Srpaulo * emulate, we can call dtrace_probe() after we've performed the emulation;
93209720Srpaulo * for instructions that we execute after we return to user-land, we set
94209720Srpaulo * %pc to the instruction we copied out (as described above) and set %npc
95209720Srpaulo * to a trap instruction stashed in the ulwp_t structure. After the traced
96209720Srpaulo * instruction is executed, the trap instruction returns control to the
97209720Srpaulo * kernel where we can fire the return probe.
98209720Srpaulo *
99209720Srpaulo * This need for a second trap in cases where we execute the traced
100209720Srpaulo * instruction makes it all the more important to emulate the most common
101209720Srpaulo * instructions to avoid the second trip in and out of the kernel.
102209720Srpaulo *
103209720Srpaulo *
104209720Srpaulo * Making it Fast
105209720Srpaulo *
106209720Srpaulo * Since copying out an instruction is neither simple nor inexpensive for the
107209720Srpaulo * CPU, we should attempt to avoid doing it in as many cases as possible.
108209720Srpaulo * Since function entry and return are usually the most interesting probe
109209720Srpaulo * sites, we attempt to tune the performance of the fasttrap provider around
110209720Srpaulo * instructions typically in those places.
111209720Srpaulo *
112209720Srpaulo * Looking at a bunch of functions in libraries and executables reveals that
113209720Srpaulo * most functions begin with either a save or a sethi (to setup a larger
114209720Srpaulo * argument to the save) and end with a restore or an or (in the case of leaf
115209720Srpaulo * functions). To try to improve performance, we emulate all of these
116209720Srpaulo * instructions in the kernel.
117209720Srpaulo *
118209720Srpaulo * The save and restore instructions are a little tricky since they perform
119209720Srpaulo * register window maniplulation. Rather than trying to tinker with the
120209720Srpaulo * register windows from the kernel, we emulate the implicit add that takes
121209720Srpaulo * place as part of those instructions and set the %pc to point to a simple
122209720Srpaulo * save or restore we've hidden in the ulwp_t structure. If we're in a return
123209720Srpaulo * probe so want to make it seem as though the tracepoint has been completely
124209720Srpaulo * executed we need to remember that we've pulled this trick with restore and
125209720Srpaulo * pull registers from the previous window (the one that we'll switch to once
126209720Srpaulo * the simple store instruction is executed) rather than the current one. This
127209720Srpaulo * is why in the case of emulating a restore we set the DTrace CPU flag
128209720Srpaulo * CPU_DTRACE_FAKERESTORE before calling dtrace_probe() for the return probes
129209720Srpaulo * (see fasttrap_return_common()).
130209720Srpaulo */
131209720Srpaulo
132209720Srpaulo#define	OP(x)		((x) >> 30)
133209720Srpaulo#define	OP2(x)		(((x) >> 22) & 0x07)
134209720Srpaulo#define	OP3(x)		(((x) >> 19) & 0x3f)
135209720Srpaulo#define	RCOND(x)	(((x) >> 25) & 0x07)
136209720Srpaulo#define	COND(x)		(((x) >> 25) & 0x0f)
137209720Srpaulo#define	A(x)		(((x) >> 29) & 0x01)
138209720Srpaulo#define	I(x)		(((x) >> 13) & 0x01)
139209720Srpaulo#define	RD(x)		(((x) >> 25) & 0x1f)
140209720Srpaulo#define	RS1(x)		(((x) >> 14) & 0x1f)
141209720Srpaulo#define	RS2(x)		(((x) >> 0) & 0x1f)
142209720Srpaulo#define	CC(x)		(((x) >> 20) & 0x03)
143209720Srpaulo#define	DISP16(x)	((((x) >> 6) & 0xc000) | ((x) & 0x3fff))
144209720Srpaulo#define	DISP22(x)	((x) & 0x3fffff)
145209720Srpaulo#define	DISP19(x)	((x) & 0x7ffff)
146209720Srpaulo#define	DISP30(x)	((x) & 0x3fffffff)
147209720Srpaulo#define	SW_TRAP(x)	((x) & 0x7f)
148209720Srpaulo
149209720Srpaulo#define	OP3_OR		0x02
150209720Srpaulo#define	OP3_RD		0x28
151209720Srpaulo#define	OP3_JMPL	0x38
152209720Srpaulo#define	OP3_RETURN	0x39
153209720Srpaulo#define	OP3_TCC		0x3a
154209720Srpaulo#define	OP3_SAVE	0x3c
155209720Srpaulo#define	OP3_RESTORE	0x3d
156209720Srpaulo
157209720Srpaulo#define	OP3_PREFETCH	0x2d
158209720Srpaulo#define	OP3_CASA	0x3c
159209720Srpaulo#define	OP3_PREFETCHA	0x3d
160209720Srpaulo#define	OP3_CASXA	0x3e
161209720Srpaulo
162209720Srpaulo#define	OP2_ILLTRAP	0x0
163209720Srpaulo#define	OP2_BPcc	0x1
164209720Srpaulo#define	OP2_Bicc	0x2
165209720Srpaulo#define	OP2_BPr		0x3
166209720Srpaulo#define	OP2_SETHI	0x4
167209720Srpaulo#define	OP2_FBPfcc	0x5
168209720Srpaulo#define	OP2_FBfcc	0x6
169209720Srpaulo
170209720Srpaulo#define	R_G0		0
171209720Srpaulo#define	R_O0		8
172209720Srpaulo#define	R_SP		14
173209720Srpaulo#define	R_I0		24
174209720Srpaulo#define	R_I1		25
175209720Srpaulo#define	R_I2		26
176209720Srpaulo#define	R_I3		27
177209720Srpaulo#define	R_I4		28
178209720Srpaulo
179209720Srpaulo/*
180209720Srpaulo * Check the comment in fasttrap.h when changing these offsets or adding
181209720Srpaulo * new instructions.
182209720Srpaulo */
183209720Srpaulo#define	FASTTRAP_OFF_SAVE	64
184209720Srpaulo#define	FASTTRAP_OFF_RESTORE	68
185209720Srpaulo#define	FASTTRAP_OFF_FTRET	72
186209720Srpaulo#define	FASTTRAP_OFF_RETURN	76
187209720Srpaulo
188209720Srpaulo#define	BREAKPOINT_INSTR	0x91d02001	/* ta 1 */
189209720Srpaulo
190209720Srpaulo/*
191209720Srpaulo * Tunable to let users turn off the fancy save instruction optimization.
192209720Srpaulo * If a program is non-ABI compliant, there's a possibility that the save
193209720Srpaulo * instruction optimization could cause an error.
194209720Srpaulo */
195209720Srpauloint fasttrap_optimize_save = 1;
196209720Srpaulo
197209720Srpaulostatic uint64_t
198209720Srpaulofasttrap_anarg(struct regs *rp, int argno)
199209720Srpaulo{
200209720Srpaulo	uint64_t value;
201209720Srpaulo
202209720Srpaulo	if (argno < 6)
203209720Srpaulo		return ((&rp->r_o0)[argno]);
204209720Srpaulo
205209720Srpaulo	if (curproc->p_model == DATAMODEL_NATIVE) {
206209720Srpaulo		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
207209720Srpaulo
208209720Srpaulo		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
209209720Srpaulo		value = dtrace_fulword(&fr->fr_argd[argno]);
210209720Srpaulo		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
211209720Srpaulo		    CPU_DTRACE_BADALIGN);
212209720Srpaulo	} else {
213209720Srpaulo		struct frame32 *fr = (struct frame32 *)rp->r_sp;
214209720Srpaulo
215209720Srpaulo		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
216209720Srpaulo		value = dtrace_fuword32(&fr->fr_argd[argno]);
217209720Srpaulo		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
218209720Srpaulo		    CPU_DTRACE_BADALIGN);
219209720Srpaulo	}
220209720Srpaulo
221209720Srpaulo	return (value);
222209720Srpaulo}
223209720Srpaulo
224209720Srpaulostatic ulong_t fasttrap_getreg(struct regs *, uint_t);
225209720Srpaulostatic void fasttrap_putreg(struct regs *, uint_t, ulong_t);
226209720Srpaulo
227209720Srpaulostatic void
228209720Srpaulofasttrap_usdt_args(fasttrap_probe_t *probe, struct regs *rp,
229209720Srpaulo    uint_t fake_restore, int argc, uintptr_t *argv)
230209720Srpaulo{
231209720Srpaulo	int i, x, cap = MIN(argc, probe->ftp_nargs);
232209720Srpaulo	int inc = (fake_restore ? 16 : 0);
233209720Srpaulo
234209720Srpaulo	/*
235209720Srpaulo	 * The only way we'll hit the fake_restore case is if a USDT probe is
236209720Srpaulo	 * invoked as a tail-call. While it wouldn't be incorrect, we can
237209720Srpaulo	 * avoid a call to fasttrap_getreg(), and safely use rp->r_sp
238209720Srpaulo	 * directly since a tail-call can't be made if the invoked function
239209720Srpaulo	 * would use the argument dump space (i.e. if there were more than
240209720Srpaulo	 * 6 arguments). We take this shortcut because unconditionally rooting
241209720Srpaulo	 * around for R_FP (R_SP + 16) would be unnecessarily painful.
242209720Srpaulo	 */
243209720Srpaulo
244209720Srpaulo	if (curproc->p_model == DATAMODEL_NATIVE) {
245209720Srpaulo		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
246209720Srpaulo		uintptr_t v;
247209720Srpaulo
248209720Srpaulo		for (i = 0; i < cap; i++) {
249209720Srpaulo			x = probe->ftp_argmap[i];
250209720Srpaulo
251209720Srpaulo			if (x < 6)
252209720Srpaulo				argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
253209720Srpaulo			else if (fasttrap_fulword(&fr->fr_argd[x], &v) != 0)
254209720Srpaulo				argv[i] = 0;
255209720Srpaulo		}
256209720Srpaulo
257209720Srpaulo	} else {
258209720Srpaulo		struct frame32 *fr = (struct frame32 *)rp->r_sp;
259209720Srpaulo		uint32_t v;
260209720Srpaulo
261209720Srpaulo		for (i = 0; i < cap; i++) {
262209720Srpaulo			x = probe->ftp_argmap[i];
263209720Srpaulo
264209720Srpaulo			if (x < 6)
265209720Srpaulo				argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
266209720Srpaulo			else if (fasttrap_fuword32(&fr->fr_argd[x], &v) != 0)
267209720Srpaulo				argv[i] = 0;
268209720Srpaulo		}
269209720Srpaulo	}
270209720Srpaulo
271209720Srpaulo	for (; i < argc; i++) {
272209720Srpaulo		argv[i] = 0;
273209720Srpaulo	}
274209720Srpaulo}
275209720Srpaulo
276209720Srpaulostatic void
277209720Srpaulofasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid,
278209720Srpaulo    uint_t fake_restore)
279209720Srpaulo{
280209720Srpaulo	fasttrap_tracepoint_t *tp;
281209720Srpaulo	fasttrap_bucket_t *bucket;
282209720Srpaulo	fasttrap_id_t *id;
283209720Srpaulo	kmutex_t *pid_mtx;
284209720Srpaulo	dtrace_icookie_t cookie;
285209720Srpaulo
286209720Srpaulo	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
287209720Srpaulo	mutex_enter(pid_mtx);
288209720Srpaulo	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
289209720Srpaulo
290209720Srpaulo	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
291209720Srpaulo		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
292209720Srpaulo		    tp->ftt_proc->ftpc_acount != 0)
293209720Srpaulo			break;
294209720Srpaulo	}
295209720Srpaulo
296209720Srpaulo	/*
297209720Srpaulo	 * Don't sweat it if we can't find the tracepoint again; unlike
298209720Srpaulo	 * when we're in fasttrap_pid_probe(), finding the tracepoint here
299209720Srpaulo	 * is not essential to the correct execution of the process.
300209720Srpaulo	 */
301209720Srpaulo	if (tp == NULL || tp->ftt_retids == NULL) {
302209720Srpaulo		mutex_exit(pid_mtx);
303209720Srpaulo		return;
304209720Srpaulo	}
305209720Srpaulo
306209720Srpaulo	for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
307209720Srpaulo		fasttrap_probe_t *probe = id->fti_probe;
308209720Srpaulo
309209720Srpaulo		if (id->fti_ptype == DTFTP_POST_OFFSETS) {
310209720Srpaulo			if (probe->ftp_argmap != NULL && fake_restore) {
311209720Srpaulo				uintptr_t t[5];
312209720Srpaulo
313209720Srpaulo				fasttrap_usdt_args(probe, rp, fake_restore,
314209720Srpaulo				    sizeof (t) / sizeof (t[0]), t);
315209720Srpaulo
316209720Srpaulo				cookie = dtrace_interrupt_disable();
317209720Srpaulo				DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
318209720Srpaulo				dtrace_probe(probe->ftp_id, t[0], t[1],
319209720Srpaulo				    t[2], t[3], t[4]);
320209720Srpaulo				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
321209720Srpaulo				dtrace_interrupt_enable(cookie);
322209720Srpaulo
323209720Srpaulo			} else if (probe->ftp_argmap != NULL) {
324209720Srpaulo				uintptr_t t[5];
325209720Srpaulo
326209720Srpaulo				fasttrap_usdt_args(probe, rp, fake_restore,
327209720Srpaulo				    sizeof (t) / sizeof (t[0]), t);
328209720Srpaulo
329209720Srpaulo				dtrace_probe(probe->ftp_id, t[0], t[1],
330209720Srpaulo				    t[2], t[3], t[4]);
331209720Srpaulo
332209720Srpaulo			} else if (fake_restore) {
333209720Srpaulo				uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
334209720Srpaulo				uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
335209720Srpaulo				uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
336209720Srpaulo				uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
337209720Srpaulo				uintptr_t arg4 = fasttrap_getreg(rp, R_I4);
338209720Srpaulo
339209720Srpaulo				cookie = dtrace_interrupt_disable();
340209720Srpaulo				DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
341209720Srpaulo				dtrace_probe(probe->ftp_id, arg0, arg1,
342209720Srpaulo				    arg2, arg3, arg4);
343209720Srpaulo				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
344209720Srpaulo				dtrace_interrupt_enable(cookie);
345209720Srpaulo
346209720Srpaulo			} else {
347209720Srpaulo				dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1,
348209720Srpaulo				    rp->r_o2, rp->r_o3, rp->r_o4);
349209720Srpaulo			}
350209720Srpaulo
351209720Srpaulo			continue;
352209720Srpaulo		}
353209720Srpaulo
354209720Srpaulo		/*
355209720Srpaulo		 * If this is only a possible return point, we must
356209720Srpaulo		 * be looking at a potential tail call in leaf context.
357209720Srpaulo		 * If the %npc is still within this function, then we
358209720Srpaulo		 * must have misidentified a jmpl as a tail-call when it
359209720Srpaulo		 * is, in fact, part of a jump table. It would be nice to
360209720Srpaulo		 * remove this tracepoint, but this is neither the time
361209720Srpaulo		 * nor the place.
362209720Srpaulo		 */
363209720Srpaulo		if ((tp->ftt_flags & FASTTRAP_F_RETMAYBE) &&
364209720Srpaulo		    rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
365209720Srpaulo			continue;
366209720Srpaulo
367209720Srpaulo		/*
368209720Srpaulo		 * It's possible for a function to branch to the delay slot
369209720Srpaulo		 * of an instruction that we've identified as a return site.
370209720Srpaulo		 * We can dectect this spurious return probe activation by
371209720Srpaulo		 * observing that in this case %npc will be %pc + 4 and %npc
372209720Srpaulo		 * will be inside the current function (unless the user is
373209720Srpaulo		 * doing _crazy_ instruction picking in which case there's
374209720Srpaulo		 * very little we can do). The second check is important
375209720Srpaulo		 * in case the last instructions of a function make a tail-
376209720Srpaulo		 * call to the function located immediately subsequent.
377209720Srpaulo		 */
378209720Srpaulo		if (rp->r_npc == rp->r_pc + 4 &&
379209720Srpaulo		    rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
380209720Srpaulo			continue;
381209720Srpaulo
382209720Srpaulo		/*
383209720Srpaulo		 * The first argument is the offset of return tracepoint
384209720Srpaulo		 * in the function; the remaining arguments are the return
385209720Srpaulo		 * values.
386209720Srpaulo		 *
387209720Srpaulo		 * If fake_restore is set, we need to pull the return values
388209720Srpaulo		 * out of the %i's rather than the %o's -- a little trickier.
389209720Srpaulo		 */
390209720Srpaulo		if (!fake_restore) {
391209720Srpaulo			dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
392209720Srpaulo			    rp->r_o0, rp->r_o1, rp->r_o2, rp->r_o3);
393209720Srpaulo		} else {
394209720Srpaulo			uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
395209720Srpaulo			uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
396209720Srpaulo			uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
397209720Srpaulo			uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
398209720Srpaulo
399209720Srpaulo			cookie = dtrace_interrupt_disable();
400209720Srpaulo			DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
401209720Srpaulo			dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
402209720Srpaulo			    arg0, arg1, arg2, arg3);
403209720Srpaulo			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
404209720Srpaulo			dtrace_interrupt_enable(cookie);
405209720Srpaulo		}
406209720Srpaulo	}
407209720Srpaulo
408209720Srpaulo	mutex_exit(pid_mtx);
409209720Srpaulo}
410209720Srpaulo
411209720Srpauloint
412209720Srpaulofasttrap_pid_probe(struct regs *rp)
413209720Srpaulo{
414209720Srpaulo	proc_t *p = curproc;
415209720Srpaulo	fasttrap_tracepoint_t *tp, tp_local;
416209720Srpaulo	fasttrap_id_t *id;
417209720Srpaulo	pid_t pid;
418209720Srpaulo	uintptr_t pc = rp->r_pc;
419209720Srpaulo	uintptr_t npc = rp->r_npc;
420209720Srpaulo	uintptr_t orig_pc = pc;
421209720Srpaulo	fasttrap_bucket_t *bucket;
422209720Srpaulo	kmutex_t *pid_mtx;
423209720Srpaulo	uint_t fake_restore = 0, is_enabled = 0;
424209720Srpaulo	dtrace_icookie_t cookie;
425209720Srpaulo
426209720Srpaulo	/*
427209720Srpaulo	 * It's possible that a user (in a veritable orgy of bad planning)
428209720Srpaulo	 * could redirect this thread's flow of control before it reached the
429209720Srpaulo	 * return probe fasttrap. In this case we need to kill the process
430209720Srpaulo	 * since it's in a unrecoverable state.
431209720Srpaulo	 */
432209720Srpaulo	if (curthread->t_dtrace_step) {
433209720Srpaulo		ASSERT(curthread->t_dtrace_on);
434209720Srpaulo		fasttrap_sigtrap(p, curthread, pc);
435209720Srpaulo		return (0);
436209720Srpaulo	}
437209720Srpaulo
438209720Srpaulo	/*
439209720Srpaulo	 * Clear all user tracing flags.
440209720Srpaulo	 */
441209720Srpaulo	curthread->t_dtrace_ft = 0;
442209720Srpaulo	curthread->t_dtrace_pc = 0;
443209720Srpaulo	curthread->t_dtrace_npc = 0;
444209720Srpaulo	curthread->t_dtrace_scrpc = 0;
445209720Srpaulo	curthread->t_dtrace_astpc = 0;
446209720Srpaulo
447209720Srpaulo	/*
448209720Srpaulo	 * Treat a child created by a call to vfork(2) as if it were its
449209720Srpaulo	 * parent. We know that there's only one thread of control in such a
450209720Srpaulo	 * process: this one.
451209720Srpaulo	 */
452209720Srpaulo	while (p->p_flag & SVFORK) {
453209720Srpaulo		p = p->p_parent;
454209720Srpaulo	}
455209720Srpaulo
456209720Srpaulo	pid = p->p_pid;
457209720Srpaulo	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
458209720Srpaulo	mutex_enter(pid_mtx);
459209720Srpaulo	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
460209720Srpaulo
461209720Srpaulo	/*
462209720Srpaulo	 * Lookup the tracepoint that the process just hit.
463209720Srpaulo	 */
464209720Srpaulo	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
465209720Srpaulo		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
466209720Srpaulo		    tp->ftt_proc->ftpc_acount != 0)
467209720Srpaulo			break;
468209720Srpaulo	}
469209720Srpaulo
470209720Srpaulo	/*
471209720Srpaulo	 * If we couldn't find a matching tracepoint, either a tracepoint has
472209720Srpaulo	 * been inserted without using the pid<pid> ioctl interface (see
473209720Srpaulo	 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
474209720Srpaulo	 */
475209720Srpaulo	if (tp == NULL) {
476209720Srpaulo		mutex_exit(pid_mtx);
477209720Srpaulo		return (-1);
478209720Srpaulo	}
479209720Srpaulo
480209720Srpaulo	for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
481209720Srpaulo		fasttrap_probe_t *probe = id->fti_probe;
482209720Srpaulo		int isentry = (id->fti_ptype == DTFTP_ENTRY);
483209720Srpaulo
484209720Srpaulo		if (id->fti_ptype == DTFTP_IS_ENABLED) {
485209720Srpaulo			is_enabled = 1;
486209720Srpaulo			continue;
487209720Srpaulo		}
488209720Srpaulo
489209720Srpaulo		/*
490209720Srpaulo		 * We note that this was an entry probe to help ustack() find
491209720Srpaulo		 * the first caller.
492209720Srpaulo		 */
493209720Srpaulo		if (isentry) {
494209720Srpaulo			cookie = dtrace_interrupt_disable();
495209720Srpaulo			DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
496209720Srpaulo		}
497209720Srpaulo		dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, rp->r_o2,
498209720Srpaulo		    rp->r_o3, rp->r_o4);
499209720Srpaulo		if (isentry) {
500209720Srpaulo			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
501209720Srpaulo			dtrace_interrupt_enable(cookie);
502209720Srpaulo		}
503209720Srpaulo	}
504209720Srpaulo
505209720Srpaulo	/*
506209720Srpaulo	 * We're about to do a bunch of work so we cache a local copy of
507209720Srpaulo	 * the tracepoint to emulate the instruction, and then find the
508209720Srpaulo	 * tracepoint again later if we need to light up any return probes.
509209720Srpaulo	 */
510209720Srpaulo	tp_local = *tp;
511209720Srpaulo	mutex_exit(pid_mtx);
512209720Srpaulo	tp = &tp_local;
513209720Srpaulo
514209720Srpaulo	/*
515209720Srpaulo	 * If there's an is-enabled probe conntected to this tracepoint it
516209720Srpaulo	 * means that there was a 'mov %g0, %o0' instruction that was placed
517209720Srpaulo	 * there by DTrace when the binary was linked. As this probe is, in
518209720Srpaulo	 * fact, enabled, we need to stuff 1 into %o0. Accordingly, we can
519209720Srpaulo	 * bypass all the instruction emulation logic since we know the
520209720Srpaulo	 * inevitable result. It's possible that a user could construct a
521209720Srpaulo	 * scenario where the 'is-enabled' probe was on some other
522209720Srpaulo	 * instruction, but that would be a rather exotic way to shoot oneself
523209720Srpaulo	 * in the foot.
524209720Srpaulo	 */
525209720Srpaulo	if (is_enabled) {
526209720Srpaulo		rp->r_o0 = 1;
527209720Srpaulo		pc = rp->r_npc;
528209720Srpaulo		npc = pc + 4;
529209720Srpaulo		goto done;
530209720Srpaulo	}
531209720Srpaulo
532209720Srpaulo	/*
533209720Srpaulo	 * We emulate certain types of instructions to ensure correctness
534209720Srpaulo	 * (in the case of position dependent instructions) or optimize
535209720Srpaulo	 * common cases. The rest we have the thread execute back in user-
536209720Srpaulo	 * land.
537209720Srpaulo	 */
538209720Srpaulo	switch (tp->ftt_type) {
539209720Srpaulo	case FASTTRAP_T_SAVE:
540209720Srpaulo	{
541209720Srpaulo		int32_t imm;
542209720Srpaulo
543209720Srpaulo		/*
544209720Srpaulo		 * This an optimization to let us handle function entry
545209720Srpaulo		 * probes more efficiently. Many functions begin with a save
546209720Srpaulo		 * instruction that follows the pattern:
547209720Srpaulo		 *	save	%sp, <imm>, %sp
548209720Srpaulo		 *
549209720Srpaulo		 * Meanwhile, we've stashed the instruction:
550209720Srpaulo		 *	save	%g1, %g0, %sp
551209720Srpaulo		 *
552209720Srpaulo		 * off of %g7, so all we have to do is stick the right value
553209720Srpaulo		 * into %g1 and reset %pc to point to the instruction we've
554209720Srpaulo		 * cleverly hidden (%npc should not be touched).
555209720Srpaulo		 */
556209720Srpaulo
557209720Srpaulo		imm = tp->ftt_instr << 19;
558209720Srpaulo		imm >>= 19;
559209720Srpaulo		rp->r_g1 = rp->r_sp + imm;
560209720Srpaulo		pc = rp->r_g7 + FASTTRAP_OFF_SAVE;
561209720Srpaulo		break;
562209720Srpaulo	}
563209720Srpaulo
564209720Srpaulo	case FASTTRAP_T_RESTORE:
565209720Srpaulo	{
566209720Srpaulo		ulong_t value;
567209720Srpaulo		uint_t rd;
568209720Srpaulo
569209720Srpaulo		/*
570209720Srpaulo		 * This is an optimization to let us handle function
571209720Srpaulo		 * return probes more efficiently. Most non-leaf functions
572209720Srpaulo		 * end with the sequence:
573209720Srpaulo		 *	ret
574209720Srpaulo		 *	restore	<reg>, <reg_or_imm>, %oX
575209720Srpaulo		 *
576209720Srpaulo		 * We've stashed the instruction:
577209720Srpaulo		 *	restore	%g0, %g0, %g0
578209720Srpaulo		 *
579209720Srpaulo		 * off of %g7 so we just need to place the correct value
580209720Srpaulo		 * in the right %i register (since after our fake-o
581209720Srpaulo		 * restore, the %i's will become the %o's) and set the %pc
582209720Srpaulo		 * to point to our hidden restore. We also set fake_restore to
583209720Srpaulo		 * let fasttrap_return_common() know that it will find the
584209720Srpaulo		 * return values in the %i's rather than the %o's.
585209720Srpaulo		 */
586209720Srpaulo
587209720Srpaulo		if (I(tp->ftt_instr)) {
588209720Srpaulo			int32_t imm;
589209720Srpaulo
590209720Srpaulo			imm = tp->ftt_instr << 19;
591209720Srpaulo			imm >>= 19;
592209720Srpaulo			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
593209720Srpaulo		} else {
594209720Srpaulo			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
595209720Srpaulo			    fasttrap_getreg(rp, RS2(tp->ftt_instr));
596209720Srpaulo		}
597209720Srpaulo
598209720Srpaulo		/*
599209720Srpaulo		 * Convert %o's to %i's; leave %g's as they are.
600209720Srpaulo		 */
601209720Srpaulo		rd = RD(tp->ftt_instr);
602209720Srpaulo		fasttrap_putreg(rp, ((rd & 0x18) == 0x8) ? rd + 16 : rd, value);
603209720Srpaulo
604209720Srpaulo		pc = rp->r_g7 + FASTTRAP_OFF_RESTORE;
605209720Srpaulo		fake_restore = 1;
606209720Srpaulo		break;
607209720Srpaulo	}
608209720Srpaulo
609209720Srpaulo	case FASTTRAP_T_RETURN:
610209720Srpaulo	{
611209720Srpaulo		uintptr_t target;
612209720Srpaulo
613209720Srpaulo		/*
614209720Srpaulo		 * A return instruction is like a jmpl (without the link
615209720Srpaulo		 * part) that executes an implicit restore. We've stashed
616209720Srpaulo		 * the instruction:
617209720Srpaulo		 *	return %o0
618209720Srpaulo		 *
619209720Srpaulo		 * off of %g7 so we just need to place the target in %o0
620209720Srpaulo		 * and set the %pc to point to the stashed return instruction.
621209720Srpaulo		 * We use %o0 since that register disappears after the return
622209720Srpaulo		 * executes, erasing any evidence of this tampering.
623209720Srpaulo		 */
624209720Srpaulo		if (I(tp->ftt_instr)) {
625209720Srpaulo			int32_t imm;
626209720Srpaulo
627209720Srpaulo			imm = tp->ftt_instr << 19;
628209720Srpaulo			imm >>= 19;
629209720Srpaulo			target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
630209720Srpaulo		} else {
631209720Srpaulo			target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
632209720Srpaulo			    fasttrap_getreg(rp, RS2(tp->ftt_instr));
633209720Srpaulo		}
634209720Srpaulo
635209720Srpaulo		fasttrap_putreg(rp, R_O0, target);
636209720Srpaulo
637209720Srpaulo		pc = rp->r_g7 + FASTTRAP_OFF_RETURN;
638209720Srpaulo		fake_restore = 1;
639209720Srpaulo		break;
640209720Srpaulo	}
641209720Srpaulo
642209720Srpaulo	case FASTTRAP_T_OR:
643209720Srpaulo	{
644209720Srpaulo		ulong_t value;
645209720Srpaulo
646209720Srpaulo		if (I(tp->ftt_instr)) {
647209720Srpaulo			int32_t imm;
648209720Srpaulo
649209720Srpaulo			imm = tp->ftt_instr << 19;
650209720Srpaulo			imm >>= 19;
651209720Srpaulo			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | imm;
652209720Srpaulo		} else {
653209720Srpaulo			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) |
654209720Srpaulo			    fasttrap_getreg(rp, RS2(tp->ftt_instr));
655209720Srpaulo		}
656209720Srpaulo
657209720Srpaulo		fasttrap_putreg(rp, RD(tp->ftt_instr), value);
658209720Srpaulo		pc = rp->r_npc;
659209720Srpaulo		npc = pc + 4;
660209720Srpaulo		break;
661209720Srpaulo	}
662209720Srpaulo
663209720Srpaulo	case FASTTRAP_T_SETHI:
664209720Srpaulo		if (RD(tp->ftt_instr) != R_G0) {
665209720Srpaulo			uint32_t imm32 = tp->ftt_instr << 10;
666209720Srpaulo			fasttrap_putreg(rp, RD(tp->ftt_instr), (ulong_t)imm32);
667209720Srpaulo		}
668209720Srpaulo		pc = rp->r_npc;
669209720Srpaulo		npc = pc + 4;
670209720Srpaulo		break;
671209720Srpaulo
672209720Srpaulo	case FASTTRAP_T_CCR:
673209720Srpaulo	{
674209720Srpaulo		uint_t c, v, z, n, taken;
675209720Srpaulo		uint_t ccr = rp->r_tstate >> TSTATE_CCR_SHIFT;
676209720Srpaulo
677209720Srpaulo		if (tp->ftt_cc != 0)
678209720Srpaulo			ccr >>= 4;
679209720Srpaulo
680209720Srpaulo		c = (ccr >> 0) & 1;
681209720Srpaulo		v = (ccr >> 1) & 1;
682209720Srpaulo		z = (ccr >> 2) & 1;
683209720Srpaulo		n = (ccr >> 3) & 1;
684209720Srpaulo
685209720Srpaulo		switch (tp->ftt_code) {
686209720Srpaulo		case 0x0:	/* BN */
687209720Srpaulo			taken = 0;		break;
688209720Srpaulo		case 0x1:	/* BE */
689209720Srpaulo			taken = z;		break;
690209720Srpaulo		case 0x2:	/* BLE */
691209720Srpaulo			taken = z | (n ^ v);	break;
692209720Srpaulo		case 0x3:	/* BL */
693209720Srpaulo			taken = n ^ v;		break;
694209720Srpaulo		case 0x4:	/* BLEU */
695209720Srpaulo			taken = c | z;		break;
696209720Srpaulo		case 0x5:	/* BCS (BLU) */
697209720Srpaulo			taken = c;		break;
698209720Srpaulo		case 0x6:	/* BNEG */
699209720Srpaulo			taken = n;		break;
700209720Srpaulo		case 0x7:	/* BVS */
701209720Srpaulo			taken = v;		break;
702209720Srpaulo		case 0x8:	/* BA */
703209720Srpaulo			/*
704209720Srpaulo			 * We handle the BA case differently since the annul
705209720Srpaulo			 * bit means something slightly different.
706209720Srpaulo			 */
707209720Srpaulo			panic("fasttrap: mishandled a branch");
708209720Srpaulo			taken = 1;		break;
709209720Srpaulo		case 0x9:	/* BNE */
710209720Srpaulo			taken = ~z;		break;
711209720Srpaulo		case 0xa:	/* BG */
712209720Srpaulo			taken = ~(z | (n ^ v));	break;
713209720Srpaulo		case 0xb:	/* BGE */
714209720Srpaulo			taken = ~(n ^ v);	break;
715209720Srpaulo		case 0xc:	/* BGU */
716209720Srpaulo			taken = ~(c | z);	break;
717209720Srpaulo		case 0xd:	/* BCC (BGEU) */
718209720Srpaulo			taken = ~c;		break;
719209720Srpaulo		case 0xe:	/* BPOS */
720209720Srpaulo			taken = ~n;		break;
721209720Srpaulo		case 0xf:	/* BVC */
722209720Srpaulo			taken = ~v;		break;
723209720Srpaulo		}
724209720Srpaulo
725209720Srpaulo		if (taken & 1) {
726209720Srpaulo			pc = rp->r_npc;
727209720Srpaulo			npc = tp->ftt_dest;
728209720Srpaulo		} else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
729209720Srpaulo			/*
730209720Srpaulo			 * Untaken annulled branches don't execute the
731209720Srpaulo			 * instruction in the delay slot.
732209720Srpaulo			 */
733209720Srpaulo			pc = rp->r_npc + 4;
734209720Srpaulo			npc = pc + 4;
735209720Srpaulo		} else {
736209720Srpaulo			pc = rp->r_npc;
737209720Srpaulo			npc = pc + 4;
738209720Srpaulo		}
739209720Srpaulo		break;
740209720Srpaulo	}
741209720Srpaulo
742209720Srpaulo	case FASTTRAP_T_FCC:
743209720Srpaulo	{
744209720Srpaulo		uint_t fcc;
745209720Srpaulo		uint_t taken;
746209720Srpaulo		uint64_t fsr;
747209720Srpaulo
748209720Srpaulo		dtrace_getfsr(&fsr);
749209720Srpaulo
750209720Srpaulo		if (tp->ftt_cc == 0) {
751209720Srpaulo			fcc = (fsr >> 10) & 0x3;
752209720Srpaulo		} else {
753209720Srpaulo			uint_t shift;
754209720Srpaulo			ASSERT(tp->ftt_cc <= 3);
755209720Srpaulo			shift = 30 + tp->ftt_cc * 2;
756209720Srpaulo			fcc = (fsr >> shift) & 0x3;
757209720Srpaulo		}
758209720Srpaulo
759209720Srpaulo		switch (tp->ftt_code) {
760209720Srpaulo		case 0x0:	/* FBN */
761209720Srpaulo			taken = (1 << fcc) & (0|0|0|0);	break;
762209720Srpaulo		case 0x1:	/* FBNE */
763209720Srpaulo			taken = (1 << fcc) & (8|4|2|0);	break;
764209720Srpaulo		case 0x2:	/* FBLG */
765209720Srpaulo			taken = (1 << fcc) & (0|4|2|0);	break;
766209720Srpaulo		case 0x3:	/* FBUL */
767209720Srpaulo			taken = (1 << fcc) & (8|0|2|0);	break;
768209720Srpaulo		case 0x4:	/* FBL */
769209720Srpaulo			taken = (1 << fcc) & (0|0|2|0);	break;
770209720Srpaulo		case 0x5:	/* FBUG */
771209720Srpaulo			taken = (1 << fcc) & (8|4|0|0);	break;
772209720Srpaulo		case 0x6:	/* FBG */
773209720Srpaulo			taken = (1 << fcc) & (0|4|0|0);	break;
774209720Srpaulo		case 0x7:	/* FBU */
775209720Srpaulo			taken = (1 << fcc) & (8|0|0|0);	break;
776209720Srpaulo		case 0x8:	/* FBA */
777209720Srpaulo			/*
778209720Srpaulo			 * We handle the FBA case differently since the annul
779209720Srpaulo			 * bit means something slightly different.
780209720Srpaulo			 */
781209720Srpaulo			panic("fasttrap: mishandled a branch");
782209720Srpaulo			taken = (1 << fcc) & (8|4|2|1);	break;
783209720Srpaulo		case 0x9:	/* FBE */
784209720Srpaulo			taken = (1 << fcc) & (0|0|0|1);	break;
785209720Srpaulo		case 0xa:	/* FBUE */
786209720Srpaulo			taken = (1 << fcc) & (8|0|0|1);	break;
787209720Srpaulo		case 0xb:	/* FBGE */
788209720Srpaulo			taken = (1 << fcc) & (0|4|0|1);	break;
789209720Srpaulo		case 0xc:	/* FBUGE */
790209720Srpaulo			taken = (1 << fcc) & (8|4|0|1);	break;
791209720Srpaulo		case 0xd:	/* FBLE */
792209720Srpaulo			taken = (1 << fcc) & (0|0|2|1);	break;
793209720Srpaulo		case 0xe:	/* FBULE */
794209720Srpaulo			taken = (1 << fcc) & (8|0|2|1);	break;
795209720Srpaulo		case 0xf:	/* FBO */
796209720Srpaulo			taken = (1 << fcc) & (0|4|2|1);	break;
797209720Srpaulo		}
798209720Srpaulo
799209720Srpaulo		if (taken) {
800209720Srpaulo			pc = rp->r_npc;
801209720Srpaulo			npc = tp->ftt_dest;
802209720Srpaulo		} else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
803209720Srpaulo			/*
804209720Srpaulo			 * Untaken annulled branches don't execute the
805209720Srpaulo			 * instruction in the delay slot.
806209720Srpaulo			 */
807209720Srpaulo			pc = rp->r_npc + 4;
808209720Srpaulo			npc = pc + 4;
809209720Srpaulo		} else {
810209720Srpaulo			pc = rp->r_npc;
811209720Srpaulo			npc = pc + 4;
812209720Srpaulo		}
813209720Srpaulo		break;
814209720Srpaulo	}
815209720Srpaulo
816209720Srpaulo	case FASTTRAP_T_REG:
817209720Srpaulo	{
818209720Srpaulo		int64_t value;
819209720Srpaulo		uint_t taken;
820209720Srpaulo		uint_t reg = RS1(tp->ftt_instr);
821209720Srpaulo
822209720Srpaulo		/*
823209720Srpaulo		 * An ILP32 process shouldn't be using a branch predicated on
824209720Srpaulo		 * an %i or an %l since it would violate the ABI. It's a
825209720Srpaulo		 * violation of the ABI because we can't ensure deterministic
826209720Srpaulo		 * behavior. We should have identified this case when we
827209720Srpaulo		 * enabled the probe.
828209720Srpaulo		 */
829209720Srpaulo		ASSERT(p->p_model == DATAMODEL_LP64 || reg < 16);
830209720Srpaulo
831209720Srpaulo		value = (int64_t)fasttrap_getreg(rp, reg);
832209720Srpaulo
833209720Srpaulo		switch (tp->ftt_code) {
834209720Srpaulo		case 0x1:	/* BRZ */
835209720Srpaulo			taken = (value == 0);	break;
836209720Srpaulo		case 0x2:	/* BRLEZ */
837209720Srpaulo			taken = (value <= 0);	break;
838209720Srpaulo		case 0x3:	/* BRLZ */
839209720Srpaulo			taken = (value < 0);	break;
840209720Srpaulo		case 0x5:	/* BRNZ */
841209720Srpaulo			taken = (value != 0);	break;
842209720Srpaulo		case 0x6:	/* BRGZ */
843209720Srpaulo			taken = (value > 0);	break;
844209720Srpaulo		case 0x7:	/* BRGEZ */
845209720Srpaulo			taken = (value >= 0);	break;
846209720Srpaulo		default:
847209720Srpaulo		case 0x0:
848209720Srpaulo		case 0x4:
849209720Srpaulo			panic("fasttrap: mishandled a branch");
850209720Srpaulo		}
851209720Srpaulo
852209720Srpaulo		if (taken) {
853209720Srpaulo			pc = rp->r_npc;
854209720Srpaulo			npc = tp->ftt_dest;
855209720Srpaulo		} else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
856209720Srpaulo			/*
857209720Srpaulo			 * Untaken annulled branches don't execute the
858209720Srpaulo			 * instruction in the delay slot.
859209720Srpaulo			 */
860209720Srpaulo			pc = rp->r_npc + 4;
861209720Srpaulo			npc = pc + 4;
862209720Srpaulo		} else {
863209720Srpaulo			pc = rp->r_npc;
864209720Srpaulo			npc = pc + 4;
865209720Srpaulo		}
866209720Srpaulo		break;
867209720Srpaulo	}
868209720Srpaulo
869209720Srpaulo	case FASTTRAP_T_ALWAYS:
870209720Srpaulo		/*
871209720Srpaulo		 * BAs, BA,As...
872209720Srpaulo		 */
873209720Srpaulo
874209720Srpaulo		if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
875209720Srpaulo			/*
876209720Srpaulo			 * Annulled branch always instructions never execute
877209720Srpaulo			 * the instruction in the delay slot.
878209720Srpaulo			 */
879209720Srpaulo			pc = tp->ftt_dest;
880209720Srpaulo			npc = tp->ftt_dest + 4;
881209720Srpaulo		} else {
882209720Srpaulo			pc = rp->r_npc;
883209720Srpaulo			npc = tp->ftt_dest;
884209720Srpaulo		}
885209720Srpaulo		break;
886209720Srpaulo
887209720Srpaulo	case FASTTRAP_T_RDPC:
888209720Srpaulo		fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
889209720Srpaulo		pc = rp->r_npc;
890209720Srpaulo		npc = pc + 4;
891209720Srpaulo		break;
892209720Srpaulo
893209720Srpaulo	case FASTTRAP_T_CALL:
894209720Srpaulo		/*
895209720Srpaulo		 * It's a call _and_ link remember...
896209720Srpaulo		 */
897209720Srpaulo		rp->r_o7 = rp->r_pc;
898209720Srpaulo		pc = rp->r_npc;
899209720Srpaulo		npc = tp->ftt_dest;
900209720Srpaulo		break;
901209720Srpaulo
902209720Srpaulo	case FASTTRAP_T_JMPL:
903209720Srpaulo		pc = rp->r_npc;
904209720Srpaulo
905209720Srpaulo		if (I(tp->ftt_instr)) {
906209720Srpaulo			uint_t rs1 = RS1(tp->ftt_instr);
907209720Srpaulo			int32_t imm;
908209720Srpaulo
909209720Srpaulo			imm = tp->ftt_instr << 19;
910209720Srpaulo			imm >>= 19;
911209720Srpaulo			npc = fasttrap_getreg(rp, rs1) + imm;
912209720Srpaulo		} else {
913209720Srpaulo			uint_t rs1 = RS1(tp->ftt_instr);
914209720Srpaulo			uint_t rs2 = RS2(tp->ftt_instr);
915209720Srpaulo
916209720Srpaulo			npc = fasttrap_getreg(rp, rs1) +
917209720Srpaulo			    fasttrap_getreg(rp, rs2);
918209720Srpaulo		}
919209720Srpaulo
920209720Srpaulo		/*
921209720Srpaulo		 * Do the link part of the jump-and-link instruction.
922209720Srpaulo		 */
923209720Srpaulo		fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
924209720Srpaulo
925209720Srpaulo		break;
926209720Srpaulo
927209720Srpaulo	case FASTTRAP_T_COMMON:
928209720Srpaulo	{
929209720Srpaulo		curthread->t_dtrace_scrpc = rp->r_g7;
930209720Srpaulo		curthread->t_dtrace_astpc = rp->r_g7 + FASTTRAP_OFF_FTRET;
931209720Srpaulo
932209720Srpaulo		/*
933209720Srpaulo		 * Copy the instruction to a reserved location in the
934209720Srpaulo		 * user-land thread structure, then set the PC to that
935209720Srpaulo		 * location and leave the NPC alone. We take pains to ensure
936209720Srpaulo		 * consistency in the instruction stream (See SPARC
937209720Srpaulo		 * Architecture Manual Version 9, sections 8.4.7, A.20, and
938209720Srpaulo		 * H.1.6; UltraSPARC I/II User's Manual, sections 3.1.1.1,
939209720Srpaulo		 * and 13.6.4) by using the ASI ASI_BLK_COMMIT_S to copy the
940209720Srpaulo		 * instruction into the user's address space without
941209720Srpaulo		 * bypassing the I$. There's no AS_USER version of this ASI
942209720Srpaulo		 * (as exist for other ASIs) so we use the lofault
943209720Srpaulo		 * mechanism to catch faults.
944209720Srpaulo		 */
945209720Srpaulo		if (dtrace_blksuword32(rp->r_g7, &tp->ftt_instr, 1) == -1) {
946209720Srpaulo			/*
947209720Srpaulo			 * If the copyout fails, then the process's state
948209720Srpaulo			 * is not consistent (the effects of the traced
949209720Srpaulo			 * instruction will never be seen). This process
950209720Srpaulo			 * cannot be allowed to continue execution.
951209720Srpaulo			 */
952209720Srpaulo			fasttrap_sigtrap(curproc, curthread, pc);
953209720Srpaulo			return (0);
954209720Srpaulo		}
955209720Srpaulo
956209720Srpaulo		curthread->t_dtrace_pc = pc;
957209720Srpaulo		curthread->t_dtrace_npc = npc;
958209720Srpaulo		curthread->t_dtrace_on = 1;
959209720Srpaulo
960209720Srpaulo		pc = curthread->t_dtrace_scrpc;
961209720Srpaulo
962209720Srpaulo		if (tp->ftt_retids != NULL) {
963209720Srpaulo			curthread->t_dtrace_step = 1;
964209720Srpaulo			curthread->t_dtrace_ret = 1;
965209720Srpaulo			npc = curthread->t_dtrace_astpc;
966209720Srpaulo		}
967209720Srpaulo		break;
968209720Srpaulo	}
969209720Srpaulo
970209720Srpaulo	default:
971209720Srpaulo		panic("fasttrap: mishandled an instruction");
972209720Srpaulo	}
973209720Srpaulo
974209720Srpaulo	/*
975209720Srpaulo	 * This bit me in the ass a couple of times, so lets toss this
976209720Srpaulo	 * in as a cursory sanity check.
977209720Srpaulo	 */
978209720Srpaulo	ASSERT(pc != rp->r_g7 + 4);
979209720Srpaulo	ASSERT(pc != rp->r_g7 + 8);
980209720Srpaulo
981209720Srpaulodone:
982209720Srpaulo	/*
983209720Srpaulo	 * If there were no return probes when we first found the tracepoint,
984209720Srpaulo	 * we should feel no obligation to honor any return probes that were
985209720Srpaulo	 * subsequently enabled -- they'll just have to wait until the next
986209720Srpaulo	 * time around.
987209720Srpaulo	 */
988209720Srpaulo	if (tp->ftt_retids != NULL) {
989209720Srpaulo		/*
990209720Srpaulo		 * We need to wait until the results of the instruction are
991209720Srpaulo		 * apparent before invoking any return probes. If this
992209720Srpaulo		 * instruction was emulated we can just call
993209720Srpaulo		 * fasttrap_return_common(); if it needs to be executed, we
994209720Srpaulo		 * need to wait until we return to the kernel.
995209720Srpaulo		 */
996209720Srpaulo		if (tp->ftt_type != FASTTRAP_T_COMMON) {
997209720Srpaulo			fasttrap_return_common(rp, orig_pc, pid, fake_restore);
998209720Srpaulo		} else {
999209720Srpaulo			ASSERT(curthread->t_dtrace_ret != 0);
1000209720Srpaulo			ASSERT(curthread->t_dtrace_pc == orig_pc);
1001209720Srpaulo			ASSERT(curthread->t_dtrace_scrpc == rp->r_g7);
1002209720Srpaulo			ASSERT(npc == curthread->t_dtrace_astpc);
1003209720Srpaulo		}
1004209720Srpaulo	}
1005209720Srpaulo
1006209720Srpaulo	ASSERT(pc != 0);
1007209720Srpaulo	rp->r_pc = pc;
1008209720Srpaulo	rp->r_npc = npc;
1009209720Srpaulo
1010209720Srpaulo	return (0);
1011209720Srpaulo}
1012209720Srpaulo
1013209720Srpauloint
1014209720Srpaulofasttrap_return_probe(struct regs *rp)
1015209720Srpaulo{
1016209720Srpaulo	proc_t *p = ttoproc(curthread);
1017209720Srpaulo	pid_t pid;
1018209720Srpaulo	uintptr_t pc = curthread->t_dtrace_pc;
1019209720Srpaulo	uintptr_t npc = curthread->t_dtrace_npc;
1020209720Srpaulo
1021209720Srpaulo	curthread->t_dtrace_pc = 0;
1022209720Srpaulo	curthread->t_dtrace_npc = 0;
1023209720Srpaulo	curthread->t_dtrace_scrpc = 0;
1024209720Srpaulo	curthread->t_dtrace_astpc = 0;
1025209720Srpaulo
1026209720Srpaulo	/*
1027209720Srpaulo	 * Treat a child created by a call to vfork(2) as if it were its
1028209720Srpaulo	 * parent. We know there's only one thread of control in such a
1029209720Srpaulo	 * process: this one.
1030209720Srpaulo	 */
1031209720Srpaulo	while (p->p_flag & SVFORK) {
1032209720Srpaulo		p = p->p_parent;
1033209720Srpaulo	}
1034209720Srpaulo
1035209720Srpaulo	/*
1036209720Srpaulo	 * We set the %pc and %npc to their values when the traced
1037209720Srpaulo	 * instruction was initially executed so that it appears to
1038209720Srpaulo	 * dtrace_probe() that we're on the original instruction, and so that
1039209720Srpaulo	 * the user can't easily detect our complex web of lies.
1040209720Srpaulo	 * dtrace_return_probe() (our caller) will correctly set %pc and %npc
1041209720Srpaulo	 * after we return.
1042209720Srpaulo	 */
1043209720Srpaulo	rp->r_pc = pc;
1044209720Srpaulo	rp->r_npc = npc;
1045209720Srpaulo
1046209720Srpaulo	pid = p->p_pid;
1047209720Srpaulo	fasttrap_return_common(rp, pc, pid, 0);
1048209720Srpaulo
1049209720Srpaulo	return (0);
1050209720Srpaulo}
1051209720Srpaulo
1052209720Srpauloint
1053209720Srpaulofasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
1054209720Srpaulo{
1055209720Srpaulo	fasttrap_instr_t instr = FASTTRAP_INSTR;
1056209720Srpaulo
1057209720Srpaulo	if (uwrite(p, &instr, 4, tp->ftt_pc) != 0)
1058209720Srpaulo		return (-1);
1059209720Srpaulo
1060209720Srpaulo	return (0);
1061209720Srpaulo}
1062209720Srpaulo
1063209720Srpauloint
1064209720Srpaulofasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
1065209720Srpaulo{
1066209720Srpaulo	fasttrap_instr_t instr;
1067209720Srpaulo
1068209720Srpaulo	/*
1069209720Srpaulo	 * Distinguish between read or write failures and a changed
1070209720Srpaulo	 * instruction.
1071209720Srpaulo	 */
1072209720Srpaulo	if (uread(p, &instr, 4, tp->ftt_pc) != 0)
1073209720Srpaulo		return (0);
1074209720Srpaulo	if (instr != FASTTRAP_INSTR && instr != BREAKPOINT_INSTR)
1075209720Srpaulo		return (0);
1076209720Srpaulo	if (uwrite(p, &tp->ftt_instr, 4, tp->ftt_pc) != 0)
1077209720Srpaulo		return (-1);
1078209720Srpaulo
1079209720Srpaulo	return (0);
1080209720Srpaulo}
1081209720Srpaulo
1082209720Srpauloint
1083209720Srpaulofasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc,
1084209720Srpaulo    fasttrap_probe_type_t type)
1085209720Srpaulo{
1086209720Srpaulo	uint32_t instr;
1087209720Srpaulo	int32_t disp;
1088209720Srpaulo
1089209720Srpaulo	/*
1090209720Srpaulo	 * Read the instruction at the given address out of the process's
1091209720Srpaulo	 * address space. We don't have to worry about a debugger
1092209720Srpaulo	 * changing this instruction before we overwrite it with our trap
1093209720Srpaulo	 * instruction since P_PR_LOCK is set.
1094209720Srpaulo	 */
1095209720Srpaulo	if (uread(p, &instr, 4, pc) != 0)
1096209720Srpaulo		return (-1);
1097209720Srpaulo
1098209720Srpaulo	/*
1099209720Srpaulo	 * Decode the instruction to fill in the probe flags. We can have
1100209720Srpaulo	 * the process execute most instructions on its own using a pc/npc
1101209720Srpaulo	 * trick, but pc-relative control transfer present a problem since
1102209720Srpaulo	 * we're relocating the instruction. We emulate these instructions
1103209720Srpaulo	 * in the kernel. We assume a default type and over-write that as
1104209720Srpaulo	 * needed.
1105209720Srpaulo	 *
1106209720Srpaulo	 * pc-relative instructions must be emulated for correctness;
1107209720Srpaulo	 * other instructions (which represent a large set of commonly traced
1108209720Srpaulo	 * instructions) are emulated or otherwise optimized for performance.
1109209720Srpaulo	 */
1110209720Srpaulo	tp->ftt_type = FASTTRAP_T_COMMON;
1111209720Srpaulo	if (OP(instr) == 1) {
1112209720Srpaulo		/*
1113209720Srpaulo		 * Call instructions.
1114209720Srpaulo		 */
1115209720Srpaulo		tp->ftt_type = FASTTRAP_T_CALL;
1116209720Srpaulo		disp = DISP30(instr) << 2;
1117209720Srpaulo		tp->ftt_dest = pc + (intptr_t)disp;
1118209720Srpaulo
1119209720Srpaulo	} else if (OP(instr) == 0) {
1120209720Srpaulo		/*
1121209720Srpaulo		 * Branch instructions.
1122209720Srpaulo		 *
1123209720Srpaulo		 * Unconditional branches need careful attention when they're
1124209720Srpaulo		 * annulled: annulled unconditional branches never execute
1125209720Srpaulo		 * the instruction in the delay slot.
1126209720Srpaulo		 */
1127209720Srpaulo		switch (OP2(instr)) {
1128209720Srpaulo		case OP2_ILLTRAP:
1129209720Srpaulo		case 0x7:
1130209720Srpaulo			/*
1131209720Srpaulo			 * The compiler may place an illtrap after a call to
1132209720Srpaulo			 * a function that returns a structure. In the case of
1133209720Srpaulo			 * a returned structure, the compiler places an illtrap
1134209720Srpaulo			 * whose const22 field is the size of the returned
1135209720Srpaulo			 * structure immediately following the delay slot of
1136209720Srpaulo			 * the call. To stay out of the way, we refuse to
1137209720Srpaulo			 * place tracepoints on top of illtrap instructions.
1138209720Srpaulo			 *
1139209720Srpaulo			 * This is one of the dumbest architectural decisions
1140209720Srpaulo			 * I've ever had to work around.
1141209720Srpaulo			 *
1142209720Srpaulo			 * We also identify the only illegal op2 value (See
1143209720Srpaulo			 * SPARC Architecture Manual Version 9, E.2 table 31).
1144209720Srpaulo			 */
1145209720Srpaulo			return (-1);
1146209720Srpaulo
1147209720Srpaulo		case OP2_BPcc:
1148209720Srpaulo			if (COND(instr) == 8) {
1149209720Srpaulo				tp->ftt_type = FASTTRAP_T_ALWAYS;
1150209720Srpaulo			} else {
1151209720Srpaulo				/*
1152209720Srpaulo				 * Check for an illegal instruction.
1153209720Srpaulo				 */
1154209720Srpaulo				if (CC(instr) & 1)
1155209720Srpaulo					return (-1);
1156209720Srpaulo				tp->ftt_type = FASTTRAP_T_CCR;
1157209720Srpaulo				tp->ftt_cc = CC(instr);
1158209720Srpaulo				tp->ftt_code = COND(instr);
1159209720Srpaulo			}
1160209720Srpaulo
1161209720Srpaulo			if (A(instr) != 0)
1162209720Srpaulo				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1163209720Srpaulo
1164209720Srpaulo			disp = DISP19(instr);
1165209720Srpaulo			disp <<= 13;
1166209720Srpaulo			disp >>= 11;
1167209720Srpaulo			tp->ftt_dest = pc + (intptr_t)disp;
1168209720Srpaulo			break;
1169209720Srpaulo
1170209720Srpaulo		case OP2_Bicc:
1171209720Srpaulo			if (COND(instr) == 8) {
1172209720Srpaulo				tp->ftt_type = FASTTRAP_T_ALWAYS;
1173209720Srpaulo			} else {
1174209720Srpaulo				tp->ftt_type = FASTTRAP_T_CCR;
1175209720Srpaulo				tp->ftt_cc = 0;
1176209720Srpaulo				tp->ftt_code = COND(instr);
1177209720Srpaulo			}
1178209720Srpaulo
1179209720Srpaulo			if (A(instr) != 0)
1180209720Srpaulo				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1181209720Srpaulo
1182209720Srpaulo			disp = DISP22(instr);
1183209720Srpaulo			disp <<= 10;
1184209720Srpaulo			disp >>= 8;
1185209720Srpaulo			tp->ftt_dest = pc + (intptr_t)disp;
1186209720Srpaulo			break;
1187209720Srpaulo
1188209720Srpaulo		case OP2_BPr:
1189209720Srpaulo			/*
1190209720Srpaulo			 * Check for an illegal instruction.
1191209720Srpaulo			 */
1192209720Srpaulo			if ((RCOND(instr) & 3) == 0)
1193209720Srpaulo				return (-1);
1194209720Srpaulo
1195209720Srpaulo			/*
1196209720Srpaulo			 * It's a violation of the v8plus ABI to use a
1197209720Srpaulo			 * register-predicated branch in a 32-bit app if
1198209720Srpaulo			 * the register used is an %l or an %i (%gs and %os
1199209720Srpaulo			 * are legit because they're not saved to the stack
1200209720Srpaulo			 * in 32-bit words when we take a trap).
1201209720Srpaulo			 */
1202209720Srpaulo			if (p->p_model == DATAMODEL_ILP32 && RS1(instr) >= 16)
1203209720Srpaulo				return (-1);
1204209720Srpaulo
1205209720Srpaulo			tp->ftt_type = FASTTRAP_T_REG;
1206209720Srpaulo			if (A(instr) != 0)
1207209720Srpaulo				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1208209720Srpaulo			disp = DISP16(instr);
1209209720Srpaulo			disp <<= 16;
1210209720Srpaulo			disp >>= 14;
1211209720Srpaulo			tp->ftt_dest = pc + (intptr_t)disp;
1212209720Srpaulo			tp->ftt_code = RCOND(instr);
1213209720Srpaulo			break;
1214209720Srpaulo
1215209720Srpaulo		case OP2_SETHI:
1216209720Srpaulo			tp->ftt_type = FASTTRAP_T_SETHI;
1217209720Srpaulo			break;
1218209720Srpaulo
1219209720Srpaulo		case OP2_FBPfcc:
1220209720Srpaulo			if (COND(instr) == 8) {
1221209720Srpaulo				tp->ftt_type = FASTTRAP_T_ALWAYS;
1222209720Srpaulo			} else {
1223209720Srpaulo				tp->ftt_type = FASTTRAP_T_FCC;
1224209720Srpaulo				tp->ftt_cc = CC(instr);
1225209720Srpaulo				tp->ftt_code = COND(instr);
1226209720Srpaulo			}
1227209720Srpaulo
1228209720Srpaulo			if (A(instr) != 0)
1229209720Srpaulo				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1230209720Srpaulo
1231209720Srpaulo			disp = DISP19(instr);
1232209720Srpaulo			disp <<= 13;
1233209720Srpaulo			disp >>= 11;
1234209720Srpaulo			tp->ftt_dest = pc + (intptr_t)disp;
1235209720Srpaulo			break;
1236209720Srpaulo
1237209720Srpaulo		case OP2_FBfcc:
1238209720Srpaulo			if (COND(instr) == 8) {
1239209720Srpaulo				tp->ftt_type = FASTTRAP_T_ALWAYS;
1240209720Srpaulo			} else {
1241209720Srpaulo				tp->ftt_type = FASTTRAP_T_FCC;
1242209720Srpaulo				tp->ftt_cc = 0;
1243209720Srpaulo				tp->ftt_code = COND(instr);
1244209720Srpaulo			}
1245209720Srpaulo
1246209720Srpaulo			if (A(instr) != 0)
1247209720Srpaulo				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1248209720Srpaulo
1249209720Srpaulo			disp = DISP22(instr);
1250209720Srpaulo			disp <<= 10;
1251209720Srpaulo			disp >>= 8;
1252209720Srpaulo			tp->ftt_dest = pc + (intptr_t)disp;
1253209720Srpaulo			break;
1254209720Srpaulo		}
1255209720Srpaulo
1256209720Srpaulo	} else if (OP(instr) == 2) {
1257209720Srpaulo		switch (OP3(instr)) {
1258209720Srpaulo		case OP3_RETURN:
1259209720Srpaulo			tp->ftt_type = FASTTRAP_T_RETURN;
1260209720Srpaulo			break;
1261209720Srpaulo
1262209720Srpaulo		case OP3_JMPL:
1263209720Srpaulo			tp->ftt_type = FASTTRAP_T_JMPL;
1264209720Srpaulo			break;
1265209720Srpaulo
1266209720Srpaulo		case OP3_RD:
1267209720Srpaulo			if (RS1(instr) == 5)
1268209720Srpaulo				tp->ftt_type = FASTTRAP_T_RDPC;
1269209720Srpaulo			break;
1270209720Srpaulo
1271209720Srpaulo		case OP3_SAVE:
1272209720Srpaulo			/*
1273209720Srpaulo			 * We optimize for save instructions at function
1274209720Srpaulo			 * entry; see the comment in fasttrap_pid_probe()
1275209720Srpaulo			 * (near FASTTRAP_T_SAVE) for details.
1276209720Srpaulo			 */
1277209720Srpaulo			if (fasttrap_optimize_save != 0 &&
1278209720Srpaulo			    type == DTFTP_ENTRY &&
1279209720Srpaulo			    I(instr) == 1 && RD(instr) == R_SP)
1280209720Srpaulo				tp->ftt_type = FASTTRAP_T_SAVE;
1281209720Srpaulo			break;
1282209720Srpaulo
1283209720Srpaulo		case OP3_RESTORE:
1284209720Srpaulo			/*
1285209720Srpaulo			 * We optimize restore instructions at function
1286209720Srpaulo			 * return; see the comment in fasttrap_pid_probe()
1287209720Srpaulo			 * (near FASTTRAP_T_RESTORE) for details.
1288209720Srpaulo			 *
1289209720Srpaulo			 * rd must be an %o or %g register.
1290209720Srpaulo			 */
1291209720Srpaulo			if ((RD(instr) & 0x10) == 0)
1292209720Srpaulo				tp->ftt_type = FASTTRAP_T_RESTORE;
1293209720Srpaulo			break;
1294209720Srpaulo
1295209720Srpaulo		case OP3_OR:
1296209720Srpaulo			/*
1297209720Srpaulo			 * A large proportion of instructions in the delay
1298209720Srpaulo			 * slot of retl instructions are or's so we emulate
1299209720Srpaulo			 * these downstairs as an optimization.
1300209720Srpaulo			 */
1301209720Srpaulo			tp->ftt_type = FASTTRAP_T_OR;
1302209720Srpaulo			break;
1303209720Srpaulo
1304209720Srpaulo		case OP3_TCC:
1305209720Srpaulo			/*
1306209720Srpaulo			 * Breakpoint instructions are effectively position-
1307209720Srpaulo			 * dependent since the debugger uses the %pc value
1308209720Srpaulo			 * to lookup which breakpoint was executed. As a
1309209720Srpaulo			 * result, we can't actually instrument breakpoints.
1310209720Srpaulo			 */
1311209720Srpaulo			if (SW_TRAP(instr) == ST_BREAKPOINT)
1312209720Srpaulo				return (-1);
1313209720Srpaulo			break;
1314209720Srpaulo
1315209720Srpaulo		case 0x19:
1316209720Srpaulo		case 0x1d:
1317209720Srpaulo		case 0x29:
1318209720Srpaulo		case 0x33:
1319209720Srpaulo		case 0x3f:
1320209720Srpaulo			/*
1321209720Srpaulo			 * Identify illegal instructions (See SPARC
1322209720Srpaulo			 * Architecture Manual Version 9, E.2 table 32).
1323209720Srpaulo			 */
1324209720Srpaulo			return (-1);
1325209720Srpaulo		}
1326209720Srpaulo	} else if (OP(instr) == 3) {
1327209720Srpaulo		uint32_t op3 = OP3(instr);
1328209720Srpaulo
1329209720Srpaulo		/*
1330209720Srpaulo		 * Identify illegal instructions (See SPARC Architecture
1331209720Srpaulo		 * Manual Version 9, E.2 table 33).
1332209720Srpaulo		 */
1333209720Srpaulo		if ((op3 & 0x28) == 0x28) {
1334209720Srpaulo			if (op3 != OP3_PREFETCH && op3 != OP3_CASA &&
1335209720Srpaulo			    op3 != OP3_PREFETCHA && op3 != OP3_CASXA)
1336209720Srpaulo				return (-1);
1337209720Srpaulo		} else {
1338209720Srpaulo			if ((op3 & 0x0f) == 0x0c || (op3 & 0x3b) == 0x31)
1339209720Srpaulo				return (-1);
1340209720Srpaulo		}
1341209720Srpaulo	}
1342209720Srpaulo
1343209720Srpaulo	tp->ftt_instr = instr;
1344209720Srpaulo
1345209720Srpaulo	/*
1346209720Srpaulo	 * We don't know how this tracepoint is going to be used, but in case
1347209720Srpaulo	 * it's used as part of a function return probe, we need to indicate
1348209720Srpaulo	 * whether it's always a return site or only potentially a return
1349209720Srpaulo	 * site. If it's part of a return probe, it's always going to be a
1350209720Srpaulo	 * return from that function if it's a restore instruction or if
1351209720Srpaulo	 * the previous instruction was a return. If we could reliably
1352209720Srpaulo	 * distinguish jump tables from return sites, this wouldn't be
1353209720Srpaulo	 * necessary.
1354209720Srpaulo	 */
1355209720Srpaulo	if (tp->ftt_type != FASTTRAP_T_RESTORE &&
1356209720Srpaulo	    (uread(p, &instr, 4, pc - sizeof (instr)) != 0 ||
1357209720Srpaulo	    !(OP(instr) == 2 && OP3(instr) == OP3_RETURN)))
1358209720Srpaulo		tp->ftt_flags |= FASTTRAP_F_RETMAYBE;
1359209720Srpaulo
1360209720Srpaulo	return (0);
1361209720Srpaulo}
1362209720Srpaulo
1363209720Srpaulo/*ARGSUSED*/
1364209720Srpaulouint64_t
1365209720Srpaulofasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1366209720Srpaulo    int aframes)
1367209720Srpaulo{
1368209720Srpaulo	return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1369209720Srpaulo}
1370209720Srpaulo
1371209720Srpaulo/*ARGSUSED*/
1372209720Srpaulouint64_t
1373209720Srpaulofasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1374209720Srpaulo    int aframes)
1375209720Srpaulo{
1376209720Srpaulo	return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1377209720Srpaulo}
1378209720Srpaulo
1379209720Srpaulostatic uint64_t fasttrap_getreg_fast_cnt;
1380209720Srpaulostatic uint64_t fasttrap_getreg_mpcb_cnt;
1381209720Srpaulostatic uint64_t fasttrap_getreg_slow_cnt;
1382209720Srpaulo
1383209720Srpaulostatic ulong_t
1384209720Srpaulofasttrap_getreg(struct regs *rp, uint_t reg)
1385209720Srpaulo{
1386209720Srpaulo	ulong_t value;
1387209720Srpaulo	dtrace_icookie_t cookie;
1388209720Srpaulo	struct machpcb *mpcb;
1389209720Srpaulo	extern ulong_t dtrace_getreg_win(uint_t, uint_t);
1390209720Srpaulo
1391209720Srpaulo	/*
1392209720Srpaulo	 * We have the %os and %gs in our struct regs, but if we need to
1393209720Srpaulo	 * snag a %l or %i we need to go scrounging around in the process's
1394209720Srpaulo	 * address space.
1395209720Srpaulo	 */
1396209720Srpaulo	if (reg == 0)
1397209720Srpaulo		return (0);
1398209720Srpaulo
1399209720Srpaulo	if (reg < 16)
1400209720Srpaulo		return ((&rp->r_g1)[reg - 1]);
1401209720Srpaulo
1402209720Srpaulo	/*
1403209720Srpaulo	 * Before we look at the user's stack, we'll check the register
1404209720Srpaulo	 * windows to see if the information we want is in there.
1405209720Srpaulo	 */
1406209720Srpaulo	cookie = dtrace_interrupt_disable();
1407209720Srpaulo	if (dtrace_getotherwin() > 0) {
1408209720Srpaulo		value = dtrace_getreg_win(reg, 1);
1409209720Srpaulo		dtrace_interrupt_enable(cookie);
1410209720Srpaulo
1411271001Sdelphij		atomic_inc_64(&fasttrap_getreg_fast_cnt);
1412209720Srpaulo
1413209720Srpaulo		return (value);
1414209720Srpaulo	}
1415209720Srpaulo	dtrace_interrupt_enable(cookie);
1416209720Srpaulo
1417209720Srpaulo	/*
1418209720Srpaulo	 * First check the machpcb structure to see if we've already read
1419209720Srpaulo	 * in the register window we're looking for; if we haven't, (and
1420209720Srpaulo	 * we probably haven't) try to copy in the value of the register.
1421209720Srpaulo	 */
1422209720Srpaulo	/* LINTED - alignment */
1423209720Srpaulo	mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1424209720Srpaulo
1425209720Srpaulo	if (get_udatamodel() == DATAMODEL_NATIVE) {
1426209720Srpaulo		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1427209720Srpaulo
1428209720Srpaulo		if (mpcb->mpcb_wbcnt > 0) {
1429209720Srpaulo			struct rwindow *rwin = (void *)mpcb->mpcb_wbuf;
1430209720Srpaulo			int i = mpcb->mpcb_wbcnt;
1431209720Srpaulo			do {
1432209720Srpaulo				i--;
1433209720Srpaulo				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1434209720Srpaulo					continue;
1435209720Srpaulo
1436271001Sdelphij				atomic_inc_64(&fasttrap_getreg_mpcb_cnt);
1437209720Srpaulo				return (rwin[i].rw_local[reg - 16]);
1438209720Srpaulo			} while (i > 0);
1439209720Srpaulo		}
1440209720Srpaulo
1441209720Srpaulo		if (fasttrap_fulword(&fr->fr_local[reg - 16], &value) != 0)
1442209720Srpaulo			goto err;
1443209720Srpaulo	} else {
1444209720Srpaulo		struct frame32 *fr =
1445209720Srpaulo		    (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1446209720Srpaulo		uint32_t *v32 = (uint32_t *)&value;
1447209720Srpaulo
1448209720Srpaulo		if (mpcb->mpcb_wbcnt > 0) {
1449209720Srpaulo			struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf;
1450209720Srpaulo			int i = mpcb->mpcb_wbcnt;
1451209720Srpaulo			do {
1452209720Srpaulo				i--;
1453209720Srpaulo				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1454209720Srpaulo					continue;
1455209720Srpaulo
1456271001Sdelphij				atomic_inc_64(&fasttrap_getreg_mpcb_cnt);
1457209720Srpaulo				return (rwin[i].rw_local[reg - 16]);
1458209720Srpaulo			} while (i > 0);
1459209720Srpaulo		}
1460209720Srpaulo
1461209720Srpaulo		if (fasttrap_fuword32(&fr->fr_local[reg - 16], &v32[1]) != 0)
1462209720Srpaulo			goto err;
1463209720Srpaulo
1464209720Srpaulo		v32[0] = 0;
1465209720Srpaulo	}
1466209720Srpaulo
1467271001Sdelphij	atomic_inc_64(&fasttrap_getreg_slow_cnt);
1468209720Srpaulo	return (value);
1469209720Srpaulo
1470209720Srpauloerr:
1471209720Srpaulo	/*
1472209720Srpaulo	 * If the copy in failed, the process will be in a irrecoverable
1473209720Srpaulo	 * state, and we have no choice but to kill it.
1474209720Srpaulo	 */
1475225617Skmacy	kern_psignal(ttoproc(curthread), SIGILL);
1476209720Srpaulo	return (0);
1477209720Srpaulo}
1478209720Srpaulo
1479209720Srpaulostatic uint64_t fasttrap_putreg_fast_cnt;
1480209720Srpaulostatic uint64_t fasttrap_putreg_mpcb_cnt;
1481209720Srpaulostatic uint64_t fasttrap_putreg_slow_cnt;
1482209720Srpaulo
1483209720Srpaulostatic void
1484209720Srpaulofasttrap_putreg(struct regs *rp, uint_t reg, ulong_t value)
1485209720Srpaulo{
1486209720Srpaulo	dtrace_icookie_t cookie;
1487209720Srpaulo	struct machpcb *mpcb;
1488209720Srpaulo	extern void dtrace_putreg_win(uint_t, ulong_t);
1489209720Srpaulo
1490209720Srpaulo	if (reg == 0)
1491209720Srpaulo		return;
1492209720Srpaulo
1493209720Srpaulo	if (reg < 16) {
1494209720Srpaulo		(&rp->r_g1)[reg - 1] = value;
1495209720Srpaulo		return;
1496209720Srpaulo	}
1497209720Srpaulo
1498209720Srpaulo	/*
1499209720Srpaulo	 * If the user process is still using some register windows, we
1500209720Srpaulo	 * can just place the value in the correct window.
1501209720Srpaulo	 */
1502209720Srpaulo	cookie = dtrace_interrupt_disable();
1503209720Srpaulo	if (dtrace_getotherwin() > 0) {
1504209720Srpaulo		dtrace_putreg_win(reg, value);
1505209720Srpaulo		dtrace_interrupt_enable(cookie);
1506271001Sdelphij		atomic_inc_64(&fasttrap_putreg_fast_cnt);
1507209720Srpaulo		return;
1508209720Srpaulo	}
1509209720Srpaulo	dtrace_interrupt_enable(cookie);
1510209720Srpaulo
1511209720Srpaulo	/*
1512209720Srpaulo	 * First see if there's a copy of the register window in the
1513209720Srpaulo	 * machpcb structure that we can modify; if there isn't try to
1514209720Srpaulo	 * copy out the value. If that fails, we try to create a new
1515209720Srpaulo	 * register window in the machpcb structure. While this isn't
1516209720Srpaulo	 * _precisely_ the intended use of the machpcb structure, it
1517209720Srpaulo	 * can't cause any problems since we know at this point in the
1518209720Srpaulo	 * code that all of the user's data have been flushed out of the
1519209720Srpaulo	 * register file (since %otherwin is 0).
1520209720Srpaulo	 */
1521209720Srpaulo	/* LINTED - alignment */
1522209720Srpaulo	mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1523209720Srpaulo
1524209720Srpaulo	if (get_udatamodel() == DATAMODEL_NATIVE) {
1525209720Srpaulo		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1526209720Srpaulo		/* LINTED - alignment */
1527209720Srpaulo		struct rwindow *rwin = (struct rwindow *)mpcb->mpcb_wbuf;
1528209720Srpaulo
1529209720Srpaulo		if (mpcb->mpcb_wbcnt > 0) {
1530209720Srpaulo			int i = mpcb->mpcb_wbcnt;
1531209720Srpaulo			do {
1532209720Srpaulo				i--;
1533209720Srpaulo				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1534209720Srpaulo					continue;
1535209720Srpaulo
1536209720Srpaulo				rwin[i].rw_local[reg - 16] = value;
1537271001Sdelphij				atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1538209720Srpaulo				return;
1539209720Srpaulo			} while (i > 0);
1540209720Srpaulo		}
1541209720Srpaulo
1542209720Srpaulo		if (fasttrap_sulword(&fr->fr_local[reg - 16], value) != 0) {
1543209720Srpaulo			if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1544209720Srpaulo			    &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1545209720Srpaulo				goto err;
1546209720Srpaulo
1547209720Srpaulo			rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = value;
1548209720Srpaulo			mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1549209720Srpaulo			mpcb->mpcb_wbcnt++;
1550271001Sdelphij			atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1551209720Srpaulo			return;
1552209720Srpaulo		}
1553209720Srpaulo	} else {
1554209720Srpaulo		struct frame32 *fr =
1555209720Srpaulo		    (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1556209720Srpaulo		/* LINTED - alignment */
1557209720Srpaulo		struct rwindow32 *rwin = (struct rwindow32 *)mpcb->mpcb_wbuf;
1558209720Srpaulo		uint32_t v32 = (uint32_t)value;
1559209720Srpaulo
1560209720Srpaulo		if (mpcb->mpcb_wbcnt > 0) {
1561209720Srpaulo			int i = mpcb->mpcb_wbcnt;
1562209720Srpaulo			do {
1563209720Srpaulo				i--;
1564209720Srpaulo				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1565209720Srpaulo					continue;
1566209720Srpaulo
1567209720Srpaulo				rwin[i].rw_local[reg - 16] = v32;
1568271001Sdelphij				atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1569209720Srpaulo				return;
1570209720Srpaulo			} while (i > 0);
1571209720Srpaulo		}
1572209720Srpaulo
1573209720Srpaulo		if (fasttrap_suword32(&fr->fr_local[reg - 16], v32) != 0) {
1574209720Srpaulo			if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1575209720Srpaulo			    &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1576209720Srpaulo				goto err;
1577209720Srpaulo
1578209720Srpaulo			rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = v32;
1579209720Srpaulo			mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1580209720Srpaulo			mpcb->mpcb_wbcnt++;
1581271001Sdelphij			atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1582209720Srpaulo			return;
1583209720Srpaulo		}
1584209720Srpaulo	}
1585209720Srpaulo
1586271001Sdelphij	atomic_inc_64(&fasttrap_putreg_slow_cnt);
1587209720Srpaulo	return;
1588209720Srpaulo
1589209720Srpauloerr:
1590209720Srpaulo	/*
1591209720Srpaulo	 * If we couldn't record this register's value, the process is in an
1592209720Srpaulo	 * irrecoverable state and we have no choice but to euthanize it.
1593209720Srpaulo	 */
1594225617Skmacy	kern_psignal(ttoproc(curthread), SIGILL);
1595209720Srpaulo}
1596