159241Srwatson/*
2126097Srwatson * CDDL HEADER START
3126097Srwatson *
459241Srwatson * The contents of this file are subject to the terms of the
559241Srwatson * Common Development and Distribution License (the "License").
685845Srwatson * You may not use this file except in compliance with the License.
785845Srwatson *
8106394Srwatson * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9106394Srwatson * or http://www.opensolaris.org/os/licensing.
10106394Srwatson * See the License for the specific language governing permissions
11106394Srwatson * and limitations under the License.
1290452Srwatson *
1359241Srwatson * When distributing Covered Code, include this CDDL HEADER in each
1459241Srwatson * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1559241Srwatson * If applicable, add the following below this CDDL HEADER, with the
1659241Srwatson * fields enclosed by brackets "[]" replaced with your own identifying
1759241Srwatson * information: Portions Copyright [yyyy] [name of copyright owner]
1859241Srwatson *
1959241Srwatson * CDDL HEADER END
2059241Srwatson */
2159241Srwatson
2259241Srwatson/*
2359241Srwatson * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
2459241Srwatson * Use is subject to license terms.
2559241Srwatson */
2659241Srwatson
2759241Srwatson#pragma ident	"%Z%%M%	%I%	%E% SMI"
2859241Srwatson
2959241Srwatson#include <sys/fasttrap_isa.h>
3059241Srwatson#include <sys/fasttrap_impl.h>
3159241Srwatson#include <sys/dtrace.h>
3259241Srwatson#include <sys/dtrace_impl.h>
3359241Srwatson#include <sys/cmn_err.h>
3459241Srwatson#include <sys/frame.h>
35116192Sobrien#include <sys/stack.h>
3659241Srwatson#include <sys/sysmacros.h>
3796755Strhodes#include <sys/trap.h>
3859241Srwatson
3959241Srwatson#include <v9/sys/machpcb.h>
40116192Sobrien#include <v9/sys/privregs.h>
41116192Sobrien
42116192Sobrien/*
4390453Srwatson * Lossless User-Land Tracing on SPARC
4490453Srwatson * -----------------------------------
4559241Srwatson *
4659241Srwatson * The Basic Idea
4759241Srwatson *
4859241Srwatson * The most important design constraint is, of course, correct execution of
4959241Srwatson * the user thread above all else. The next most important goal is rapid
5059241Srwatson * execution. We combine execution of instructions in user-land with
51164033Srwatson * emulation of certain instructions in the kernel to aim for complete
5259241Srwatson * correctness and maximal performance.
5359241Srwatson *
5459241Srwatson * We take advantage of the split PC/NPC architecture to speed up logical
5559241Srwatson * single-stepping; when we copy an instruction out to the scratch space in
5674234Srwatson * the ulwp_t structure (held in the %g7 register on SPARC), we can
5774273Srwatson * effectively single step by setting the PC to our scratch space and leaving
58176797Srwatson * the NPC alone. This executes the replaced instruction and then continues
5975106Srwatson * on without having to reenter the kernel as with single- stepping. The
6059241Srwatson * obvious caveat is for instructions whose execution is PC dependant --
6192768Sjeff * branches, call and link instructions (call and jmpl), and the rdpc
6274234Srwatson * instruction. These instructions cannot be executed in the manner described
6374234Srwatson * so they must be emulated in the kernel.
6459241Srwatson *
6559241Srwatson * Emulation for this small set of instructions if fairly simple; the most
6659241Srwatson * difficult part being emulating branch conditions.
6759241Srwatson *
6874234Srwatson *
6959241Srwatson * A Cache Heavy Portfolio
7074433Srwatson *
7174273Srwatson * It's important to note at this time that copying an instruction out to the
7259241Srwatson * ulwp_t scratch space in user-land is rather complicated. SPARC has
7359241Srwatson * separate data and instruction caches so any writes to the D$ (using a
7475106Srwatson * store instruction for example) aren't necessarily reflected in the I$.
7575106Srwatson * The flush instruction can be used to synchronize the two and must be used
7675106Srwatson * for any self-modifying code, but the flush instruction only applies to the
7775106Srwatson * primary address space (the absence of a flusha analogue to the flush
7885577Srwatson * instruction that accepts an ASI argument is an obvious omission from SPARC
7990453Srwatson * v9 where the notion of the alternate address space was introduced on
8074273Srwatson * SPARC). To correctly copy out the instruction we must use a block store
8190453Srwatson * that doesn't allocate in the D$ and ensures synchronization with the I$;
8290453Srwatson * see dtrace_blksuword32() for the implementation  (this function uses
8374437Srwatson * ASI_BLK_COMMIT_S to write a block through the secondary ASI in the manner
8490453Srwatson * described). Refer to the UltraSPARC I/II manual for details on the
8590453Srwatson * ASI_BLK_COMMIT_S ASI.
8674437Srwatson *
8790453Srwatson *
8874437Srwatson * Return Subtleties
8990453Srwatson *
9090453Srwatson * When we're firing a return probe we need to expose the value returned by
9174437Srwatson * the function being traced. Since the function can set the return value
9290453Srwatson * in its last instruction, we need to fire the return probe only _after_
9390453Srwatson * the effects of the instruction are apparent. For instructions that we
9474437Srwatson * emulate, we can call dtrace_probe() after we've performed the emulation;
9590453Srwatson * for instructions that we execute after we return to user-land, we set
96191990Sattilio * %pc to the instruction we copied out (as described above) and set %npc
97186898Skib * to a trap instruction stashed in the ulwp_t structure. After the traced
98186898Skib * instruction is executed, the trap instruction returns control to the
99191990Sattilio * kernel where we can fire the return probe.
100186898Skib *
101186898Skib * This need for a second trap in cases where we execute the traced
10259241Srwatson * instruction makes it all the more important to emulate the most common
10359241Srwatson * instructions to avoid the second trip in and out of the kernel.
10470776Srwatson *
105176797Srwatson *
106176797Srwatson * Making it Fast
107176797Srwatson *
108176797Srwatson * Since copying out an instruction is neither simple nor inexpensive for the
10959241Srwatson * CPU, we should attempt to avoid doing it in as many cases as possible.
11059241Srwatson * Since function entry and return are usually the most interesting probe
111234613Strasz * sites, we attempt to tune the performance of the fasttrap provider around
11259241Srwatson * instructions typically in those places.
11359241Srwatson *
114176797Srwatson * Looking at a bunch of functions in libraries and executables reveals that
11559241Srwatson * most functions begin with either a save or a sethi (to setup a larger
11659241Srwatson * argument to the save) and end with a restore or an or (in the case of leaf
11759241Srwatson * functions). To try to improve performance, we emulate all of these
118234613Strasz * instructions in the kernel.
11959241Srwatson *
12059241Srwatson * The save and restore instructions are a little tricky since they perform
121176797Srwatson * register window maniplulation. Rather than trying to tinker with the
12259241Srwatson * register windows from the kernel, we emulate the implicit add that takes
12359241Srwatson * place as part of those instructions and set the %pc to point to a simple
124131067Srwatson * save or restore we've hidden in the ulwp_t structure. If we're in a return
12565377Srwatson * probe so want to make it seem as though the tracepoint has been completely
12665377Srwatson * executed we need to remember that we've pulled this trick with restore and
12765377Srwatson * pull registers from the previous window (the one that we'll switch to once
12865377Srwatson * the simple store instruction is executed) rather than the current one. This
12970776Srwatson * is why in the case of emulating a restore we set the DTrace CPU flag
13070776Srwatson * CPU_DTRACE_FAKERESTORE before calling dtrace_probe() for the return probes
13165377Srwatson * (see fasttrap_return_common()).
13265377Srwatson */
13385577Srwatson
13465377Srwatson#define	OP(x)		((x) >> 30)
13565377Srwatson#define	OP2(x)		(((x) >> 22) & 0x07)
13665377Srwatson#define	OP3(x)		(((x) >> 19) & 0x3f)
13765377Srwatson#define	RCOND(x)	(((x) >> 25) & 0x07)
13865377Srwatson#define	COND(x)		(((x) >> 25) & 0x0f)
13965377Srwatson#define	A(x)		(((x) >> 29) & 0x01)
14065377Srwatson#define	I(x)		(((x) >> 13) & 0x01)
14165377Srwatson#define	RD(x)		(((x) >> 25) & 0x1f)
14265377Srwatson#define	RS1(x)		(((x) >> 14) & 0x1f)
14365377Srwatson#define	RS2(x)		(((x) >> 0) & 0x1f)
14459241Srwatson#define	CC(x)		(((x) >> 20) & 0x03)
14559241Srwatson#define	DISP16(x)	((((x) >> 6) & 0xc000) | ((x) & 0x3fff))
14659241Srwatson#define	DISP22(x)	((x) & 0x3fffff)
14759241Srwatson#define	DISP19(x)	((x) & 0x7ffff)
14874437Srwatson#define	DISP30(x)	((x) & 0x3fffffff)
14974273Srwatson#define	SW_TRAP(x)	((x) & 0x7f)
15059241Srwatson
151131066Srwatson#define	OP3_OR		0x02
15259241Srwatson#define	OP3_RD		0x28
153176797Srwatson#define	OP3_JMPL	0x38
154176797Srwatson#define	OP3_RETURN	0x39
15571999Sphk#define	OP3_TCC		0x3a
156131066Srwatson#define	OP3_SAVE	0x3c
15771999Sphk#define	OP3_RESTORE	0x3d
15870776Srwatson
15974273Srwatson#define	OP3_PREFETCH	0x2d
16074437Srwatson#define	OP3_CASA	0x3c
16170776Srwatson#define	OP3_PREFETCHA	0x3d
16270776Srwatson#define	OP3_CASXA	0x3e
16370776Srwatson
16459241Srwatson#define	OP2_ILLTRAP	0x0
16570776Srwatson#define	OP2_BPcc	0x1
16659241Srwatson#define	OP2_Bicc	0x2
16759241Srwatson#define	OP2_BPr		0x3
16859241Srwatson#define	OP2_SETHI	0x4
16959241Srwatson#define	OP2_FBPfcc	0x5
17059241Srwatson#define	OP2_FBfcc	0x6
17159241Srwatson
17259241Srwatson#define	R_G0		0
17359241Srwatson#define	R_O0		8
17459241Srwatson#define	R_SP		14
17559241Srwatson#define	R_I0		24
17659241Srwatson#define	R_I1		25
17759241Srwatson#define	R_I2		26
178176797Srwatson#define	R_I3		27
17959241Srwatson#define	R_I4		28
18059241Srwatson
18159241Srwatson/*
18259241Srwatson * Check the comment in fasttrap.h when changing these offsets or adding
18366616Srwatson * new instructions.
18466616Srwatson */
18566616Srwatson#define	FASTTRAP_OFF_SAVE	64
18666616Srwatson#define	FASTTRAP_OFF_RESTORE	68
18766616Srwatson#define	FASTTRAP_OFF_FTRET	72
18866616Srwatson#define	FASTTRAP_OFF_RETURN	76
18966616Srwatson
19066616Srwatson#define	BREAKPOINT_INSTR	0x91d02001	/* ta 1 */
19166616Srwatson
19266616Srwatson/*
19366893Srwatson * Tunable to let users turn off the fancy save instruction optimization.
19466616Srwatson * If a program is non-ABI compliant, there's a possibility that the save
19566616Srwatson * instruction optimization could cause an error.
19666616Srwatson */
19785579Srwatsonint fasttrap_optimize_save = 1;
19866616Srwatson
19966616Srwatsonstatic uint64_t
20066616Srwatsonfasttrap_anarg(struct regs *rp, int argno)
20166616Srwatson{
202176797Srwatson	uint64_t value;
20366616Srwatson
20466616Srwatson	if (argno < 6)
20566616Srwatson		return ((&rp->r_o0)[argno]);
20670776Srwatson
20759241Srwatson	if (curproc->p_model == DATAMODEL_NATIVE) {
20859241Srwatson		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
20983366Sjulian
21059241Srwatson		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
211131066Srwatson		value = dtrace_fulword(&fr->fr_argd[argno]);
212131066Srwatson		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
21359241Srwatson		    CPU_DTRACE_BADALIGN);
21459241Srwatson	} else {
21559241Srwatson		struct frame32 *fr = (struct frame32 *)rp->r_sp;
216234613Strasz
217186898Skib		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
218234613Strasz		value = dtrace_fuword32(&fr->fr_argd[argno]);
219186898Skib		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
220186898Skib		    CPU_DTRACE_BADALIGN);
22159241Srwatson	}
222186898Skib
223186898Skib	return (value);
224186898Skib}
225186898Skib
226186898Skibstatic ulong_t fasttrap_getreg(struct regs *, uint_t);
227186898Skibstatic void fasttrap_putreg(struct regs *, uint_t, ulong_t);
228186898Skib
22959241Srwatsonstatic void
23059241Srwatsonfasttrap_usdt_args(fasttrap_probe_t *probe, struct regs *rp,
23191406Sjhb    uint_t fake_restore, int argc, uintptr_t *argv)
232186898Skib{
23359241Srwatson	int i, x, cap = MIN(argc, probe->ftp_nargs);
23459241Srwatson	int inc = (fake_restore ? 16 : 0);
23574433Srwatson
23659241Srwatson	/*
23774234Srwatson	 * The only way we'll hit the fake_restore case is if a USDT probe is
23874234Srwatson	 * invoked as a tail-call. While it wouldn't be incorrect, we can
23974234Srwatson	 * avoid a call to fasttrap_getreg(), and safely use rp->r_sp
24074234Srwatson	 * directly since a tail-call can't be made if the invoked function
24174234Srwatson	 * would use the argument dump space (i.e. if there were more than
24274234Srwatson	 * 6 arguments). We take this shortcut because unconditionally rooting
24374234Srwatson	 * around for R_FP (R_SP + 16) would be unnecessarily painful.
24474234Srwatson	 */
24574234Srwatson
24674234Srwatson	if (curproc->p_model == DATAMODEL_NATIVE) {
24774234Srwatson		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
24883366Sjulian		uintptr_t v;
24974234Srwatson
25074234Srwatson		for (i = 0; i < cap; i++) {
25174234Srwatson			x = probe->ftp_argmap[i];
25274234Srwatson
25374234Srwatson			if (x < 6)
25474234Srwatson				argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
25574234Srwatson			else if (fasttrap_fulword(&fr->fr_argd[x], &v) != 0)
25674234Srwatson				argv[i] = 0;
25774234Srwatson		}
25874234Srwatson
25974234Srwatson	} else {
260151258Srwatson		struct frame32 *fr = (struct frame32 *)rp->r_sp;
26183366Sjulian		uint32_t v;
26291406Sjhb
263111119Simp		for (i = 0; i < cap; i++) {
26474234Srwatson			x = probe->ftp_argmap[i];
26574234Srwatson
26674234Srwatson			if (x < 6)
26774234Srwatson				argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
26874234Srwatson			else if (fasttrap_fuword32(&fr->fr_argd[x], &v) != 0)
269175294Sattilio				argv[i] = 0;
27074234Srwatson		}
27192768Sjeff	}
27274234Srwatson
27374234Srwatson	for (; i < argc; i++) {
27474234Srwatson		argv[i] = 0;
27574234Srwatson	}
276138814Simp}
27774234Srwatson
27874234Srwatsonstatic void
27974234Srwatsonfasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid,
28074234Srwatson    uint_t fake_restore)
28192768Sjeff{
28274234Srwatson	fasttrap_tracepoint_t *tp;
28374234Srwatson	fasttrap_bucket_t *bucket;
28474234Srwatson	fasttrap_id_t *id;
28574234Srwatson	kmutex_t *pid_mtx;
28674234Srwatson	dtrace_icookie_t cookie;
287144209Sjeff
288175294Sattilio	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
28974234Srwatson	mutex_enter(pid_mtx);
29074234Srwatson	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
29174234Srwatson
29274234Srwatson	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
29374234Srwatson		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
294144209Sjeff		    tp->ftt_proc->ftpc_acount != 0)
295144209Sjeff			break;
29674234Srwatson	}
29774234Srwatson
29874234Srwatson	/*
29974234Srwatson	 * Don't sweat it if we can't find the tracepoint again; unlike
30074234Srwatson	 * when we're in fasttrap_pid_probe(), finding the tracepoint here
30174234Srwatson	 * is not essential to the correct execution of the process.
30274234Srwatson	 */
30374234Srwatson	if (tp == NULL || tp->ftt_retids == NULL) {
304144209Sjeff		mutex_exit(pid_mtx);
305175294Sattilio		return;
30674234Srwatson	}
307144209Sjeff
308144209Sjeff	for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
30974234Srwatson		fasttrap_probe_t *probe = id->fti_probe;
31074234Srwatson
31174234Srwatson		if (id->fti_ptype == DTFTP_POST_OFFSETS) {
31274234Srwatson			if (probe->ftp_argmap != NULL && fake_restore) {
31374234Srwatson				uintptr_t t[5];
31474433Srwatson
31574234Srwatson				fasttrap_usdt_args(probe, rp, fake_restore,
31674234Srwatson				    sizeof (t) / sizeof (t[0]), t);
31796755Strhodes
31874273Srwatson				cookie = dtrace_interrupt_disable();
31985580Srwatson				DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
32085580Srwatson				dtrace_probe(probe->ftp_id, t[0], t[1],
32185580Srwatson				    t[2], t[3], t[4]);
32274234Srwatson				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
32374234Srwatson				dtrace_interrupt_enable(cookie);
32474234Srwatson
32583366Sjulian			} else if (probe->ftp_argmap != NULL) {
32674234Srwatson				uintptr_t t[5];
32774234Srwatson
32874234Srwatson				fasttrap_usdt_args(probe, rp, fake_restore,
329170183Skib				    sizeof (t) / sizeof (t[0]), t);
33074234Srwatson
33174234Srwatson				dtrace_probe(probe->ftp_id, t[0], t[1],
33274234Srwatson				    t[2], t[3], t[4]);
333175294Sattilio
33474234Srwatson			} else if (fake_restore) {
33574234Srwatson				uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
33674234Srwatson				uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
337242476Skib				uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
338232701Sjhb				uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
339232701Sjhb				uintptr_t arg4 = fasttrap_getreg(rp, R_I4);
34074234Srwatson
34174234Srwatson				cookie = dtrace_interrupt_disable();
34274234Srwatson				DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
343175294Sattilio				dtrace_probe(probe->ftp_id, arg0, arg1,
34474234Srwatson				    arg2, arg3, arg4);
34583366Sjulian				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
34677847Stmm				dtrace_interrupt_enable(cookie);
34791406Sjhb
34877847Stmm			} else {
34974234Srwatson				dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1,
35074234Srwatson				    rp->r_o2, rp->r_o3, rp->r_o4);
35174433Srwatson			}
35274234Srwatson
35374234Srwatson			continue;
35474234Srwatson		}
35574234Srwatson
35674234Srwatson		/*
35774234Srwatson		 * If this is only a possible return point, we must
35874234Srwatson		 * be looking at a potential tail call in leaf context.
35974234Srwatson		 * If the %npc is still within this function, then we
36074234Srwatson		 * must have misidentified a jmpl as a tail-call when it
36183366Sjulian		 * is, in fact, part of a jump table. It would be nice to
36274234Srwatson		 * remove this tracepoint, but this is neither the time
36374234Srwatson		 * nor the place.
36474234Srwatson		 */
36574234Srwatson		if ((tp->ftt_flags & FASTTRAP_F_RETMAYBE) &&
36674234Srwatson		    rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
36774234Srwatson			continue;
36874234Srwatson
36974234Srwatson		/*
37074234Srwatson		 * It's possible for a function to branch to the delay slot
37174234Srwatson		 * of an instruction that we've identified as a return site.
37274234Srwatson		 * We can dectect this spurious return probe activation by
37374234Srwatson		 * observing that in this case %npc will be %pc + 4 and %npc
374184205Sdes		 * will be inside the current function (unless the user is
37574234Srwatson		 * doing _crazy_ instruction picking in which case there's
37674234Srwatson		 * very little we can do). The second check is important
37774234Srwatson		 * in case the last instructions of a function make a tail-
37874234Srwatson		 * call to the function located immediately subsequent.
37974234Srwatson		 */
38083366Sjulian		if (rp->r_npc == rp->r_pc + 4 &&
38174234Srwatson		    rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
38274234Srwatson			continue;
383138814Simp
38474234Srwatson		/*
38574234Srwatson		 * The first argument is the offset of return tracepoint
38691406Sjhb		 * in the function; the remaining arguments are the return
38774234Srwatson		 * values.
38874234Srwatson		 *
38974234Srwatson		 * If fake_restore is set, we need to pull the return values
39074234Srwatson		 * out of the %i's rather than the %o's -- a little trickier.
39174234Srwatson		 */
39274234Srwatson		if (!fake_restore) {
39374234Srwatson			dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
39474234Srwatson			    rp->r_o0, rp->r_o1, rp->r_o2, rp->r_o3);
39574234Srwatson		} else {
39674234Srwatson			uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
39774234Srwatson			uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
39874234Srwatson			uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
39974234Srwatson			uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
40074234Srwatson
40174234Srwatson			cookie = dtrace_interrupt_disable();
402252438Sgleb			DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
40374234Srwatson			dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
40474234Srwatson			    arg0, arg1, arg2, arg3);
40574234Srwatson			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
40674234Srwatson			dtrace_interrupt_enable(cookie);
40783366Sjulian		}
40874234Srwatson	}
40974234Srwatson
41074234Srwatson	mutex_exit(pid_mtx);
41174234Srwatson}
41274234Srwatson
41374234Srwatsonint
41474234Srwatsonfasttrap_pid_probe(struct regs *rp)
41574234Srwatson{
41674234Srwatson	proc_t *p = curproc;
41783366Sjulian	fasttrap_tracepoint_t *tp, tp_local;
41874273Srwatson	fasttrap_id_t *id;
41974234Srwatson	pid_t pid;
42074234Srwatson	uintptr_t pc = rp->r_pc;
42174234Srwatson	uintptr_t npc = rp->r_npc;
42274234Srwatson	uintptr_t orig_pc = pc;
42385580Srwatson	fasttrap_bucket_t *bucket;
42485580Srwatson	kmutex_t *pid_mtx;
42585580Srwatson	uint_t fake_restore = 0, is_enabled = 0;
42674234Srwatson	dtrace_icookie_t cookie;
42774234Srwatson
42874234Srwatson	/*
42974234Srwatson	 * It's possible that a user (in a veritable orgy of bad planning)
43074234Srwatson	 * could redirect this thread's flow of control before it reached the
43174234Srwatson	 * return probe fasttrap. In this case we need to kill the process
43274234Srwatson	 * since it's in a unrecoverable state.
433184205Sdes	 */
43474234Srwatson	if (curthread->t_dtrace_step) {
43574234Srwatson		ASSERT(curthread->t_dtrace_on);
43674234Srwatson		fasttrap_sigtrap(p, curthread, pc);
43774234Srwatson		return (0);
43874234Srwatson	}
43974234Srwatson
44074234Srwatson	/*
44174234Srwatson	 * Clear all user tracing flags.
44274234Srwatson	 */
44383366Sjulian	curthread->t_dtrace_ft = 0;
44474234Srwatson	curthread->t_dtrace_pc = 0;
445186898Skib	curthread->t_dtrace_npc = 0;
446186898Skib	curthread->t_dtrace_scrpc = 0;
447186898Skib	curthread->t_dtrace_astpc = 0;
448186898Skib
449234613Strasz	/*
450186898Skib	 * Treat a child created by a call to vfork(2) as if it were its
451234613Strasz	 * parent. We know that there's only one thread of control in such a
452186898Skib	 * process: this one.
453186898Skib	 */
454186898Skib	while (p->p_flag & SVFORK) {
455186898Skib		p = p->p_parent;
456186898Skib	}
457186898Skib
45874404Srwatson	pid = p->p_pid;
459176752Srwatson	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
46074234Srwatson	mutex_enter(pid_mtx);
46174234Srwatson	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
46274234Srwatson
463176752Srwatson	/*
464176752Srwatson	 * Lookup the tracepoint that the process just hit.
465176752Srwatson	 */
466176752Srwatson	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
467176752Srwatson		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
468176752Srwatson		    tp->ftt_proc->ftpc_acount != 0)
469176752Srwatson			break;
47096755Strhodes	}
47174273Srwatson
47274234Srwatson	/*
473191990Sattilio	 * If we couldn't find a matching tracepoint, either a tracepoint has
47474234Srwatson	 * been inserted without using the pid<pid> ioctl interface (see
47585578Srwatson	 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
47685578Srwatson	 */
47774234Srwatson	if (tp == NULL) {
47874234Srwatson		mutex_exit(pid_mtx);
47974234Srwatson		return (-1);
48074234Srwatson	}
48183366Sjulian
48274234Srwatson	for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
48374234Srwatson		fasttrap_probe_t *probe = id->fti_probe;
48474234Srwatson		int isentry = (id->fti_ptype == DTFTP_ENTRY);
48574234Srwatson
48674234Srwatson		if (id->fti_ptype == DTFTP_IS_ENABLED) {
48774234Srwatson			is_enabled = 1;
48874234Srwatson			continue;
489155160Sjeff		}
49074234Srwatson
49174234Srwatson		/*
49274234Srwatson		 * We note that this was an entry probe to help ustack() find
49374234Srwatson		 * the first caller.
49474234Srwatson		 */
49574234Srwatson		if (isentry) {
49674273Srwatson			cookie = dtrace_interrupt_disable();
49774273Srwatson			DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
49874404Srwatson		}
49974234Srwatson		dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, rp->r_o2,
50074234Srwatson		    rp->r_o3, rp->r_o4);
501186898Skib		if (isentry) {
50274234Srwatson			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
50374234Srwatson			dtrace_interrupt_enable(cookie);
50474234Srwatson		}
50574404Srwatson	}
50674234Srwatson
50774234Srwatson	/*
50874234Srwatson	 * We're about to do a bunch of work so we cache a local copy of
50974404Srwatson	 * the tracepoint to emulate the instruction, and then find the
51074404Srwatson	 * tracepoint again later if we need to light up any return probes.
51174404Srwatson	 */
51274404Srwatson	tp_local = *tp;
51374404Srwatson	mutex_exit(pid_mtx);
51474234Srwatson	tp = &tp_local;
51574404Srwatson
51683366Sjulian	/*
51774404Srwatson	 * If there's an is-enabled probe conntected to this tracepoint it
51874404Srwatson	 * means that there was a 'mov %g0, %o0' instruction that was placed
51983366Sjulian	 * there by DTrace when the binary was linked. As this probe is, in
52074404Srwatson	 * fact, enabled, we need to stuff 1 into %o0. Accordingly, we can
52174404Srwatson	 * bypass all the instruction emulation logic since we know the
52274404Srwatson	 * inevitable result. It's possible that a user could construct a
52374404Srwatson	 * scenario where the 'is-enabled' probe was on some other
52474404Srwatson	 * instruction, but that would be a rather exotic way to shoot oneself
52574234Srwatson	 * in the foot.
52674404Srwatson	 */
52783366Sjulian	if (is_enabled) {
52874404Srwatson		rp->r_o0 = 1;
52974404Srwatson		pc = rp->r_npc;
53083366Sjulian		npc = pc + 4;
53174404Srwatson		goto done;
53274404Srwatson	}
53374404Srwatson
53474404Srwatson	/*
53574404Srwatson	 * We emulate certain types of instructions to ensure correctness
53674404Srwatson	 * (in the case of position dependent instructions) or optimize
53774404Srwatson	 * common cases. The rest we have the thread execute back in user-
53874234Srwatson	 * land.
53974234Srwatson	 */
54074404Srwatson	switch (tp->ftt_type) {
54174234Srwatson	case FASTTRAP_T_SAVE:
54274234Srwatson	{
54374234Srwatson		int32_t imm;
54474234Srwatson
54574433Srwatson		/*
54674234Srwatson		 * This an optimization to let us handle function entry
54774234Srwatson		 * probes more efficiently. Many functions begin with a save
54870776Srwatson		 * instruction that follows the pattern:
54959241Srwatson		 *	save	%sp, <imm>, %sp
55059241Srwatson		 *
55183366Sjulian		 * Meanwhile, we've stashed the instruction:
55259241Srwatson		 *	save	%g1, %g0, %sp
553131066Srwatson		 *
554131066Srwatson		 * off of %g7, so all we have to do is stick the right value
555131066Srwatson		 * into %g1 and reset %pc to point to the instruction we've
55659241Srwatson		 * cleverly hidden (%npc should not be touched).
557234613Strasz		 */
55859241Srwatson
55959241Srwatson		imm = tp->ftt_instr << 19;
56059241Srwatson		imm >>= 19;
56159241Srwatson		rp->r_g1 = rp->r_sp + imm;
56259241Srwatson		pc = rp->r_g7 + FASTTRAP_OFF_SAVE;
56359241Srwatson		break;
564152163Sdelphij	}
56574437Srwatson
56683366Sjulian	case FASTTRAP_T_RESTORE:
56770776Srwatson	{
56859241Srwatson		ulong_t value;
56959241Srwatson		uint_t rd;
57059241Srwatson
57159241Srwatson		/*
57259241Srwatson		 * This is an optimization to let us handle function
57359241Srwatson		 * return probes more efficiently. Most non-leaf functions
57459241Srwatson		 * end with the sequence:
575234613Strasz		 *	ret
57659241Srwatson		 *	restore	<reg>, <reg_or_imm>, %oX
57759241Srwatson		 *
57859241Srwatson		 * We've stashed the instruction:
57959241Srwatson		 *	restore	%g0, %g0, %g0
58059241Srwatson		 *
58196755Strhodes		 * off of %g7 so we just need to place the correct value
58274273Srwatson		 * in the right %i register (since after our fake-o
58359241Srwatson		 * restore, the %i's will become the %o's) and set the %pc
58459241Srwatson		 * to point to our hidden restore. We also set fake_restore to
58574437Srwatson		 * let fasttrap_return_common() know that it will find the
58683366Sjulian		 * return values in the %i's rather than the %o's.
58759241Srwatson		 */
588131066Srwatson
589131066Srwatson		if (I(tp->ftt_instr)) {
590131066Srwatson			int32_t imm;
591131066Srwatson
59259241Srwatson			imm = tp->ftt_instr << 19;
59385577Srwatson			imm >>= 19;
59465377Srwatson			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
59559241Srwatson		} else {
59659241Srwatson			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
59759241Srwatson			    fasttrap_getreg(rp, RS2(tp->ftt_instr));
598184214Sdes		}
599184214Sdes
60059241Srwatson		/*
60159241Srwatson		 * Convert %o's to %i's; leave %g's as they are.
60259241Srwatson		 */
60359241Srwatson		rd = RD(tp->ftt_instr);
60459241Srwatson		fasttrap_putreg(rp, ((rd & 0x18) == 0x8) ? rd + 16 : rd, value);
60559241Srwatson
60659241Srwatson		pc = rp->r_g7 + FASTTRAP_OFF_RESTORE;
60759241Srwatson		fake_restore = 1;
60874437Srwatson		break;
60965768Srwatson	}
61059241Srwatson
61159241Srwatson	case FASTTRAP_T_RETURN:
61259241Srwatson	{
61385578Srwatson		uintptr_t target;
61485578Srwatson
61574437Srwatson		/*
61659241Srwatson		 * A return instruction is like a jmpl (without the link
61759241Srwatson		 * part) that executes an implicit restore. We've stashed
61859241Srwatson		 * the instruction:
61959241Srwatson		 *	return %o0
62059241Srwatson		 *
62159241Srwatson		 * off of %g7 so we just need to place the target in %o0
62259241Srwatson		 * and set the %pc to point to the stashed return instruction.
62359241Srwatson		 * We use %o0 since that register disappears after the return
62459241Srwatson		 * executes, erasing any evidence of this tampering.
62559241Srwatson		 */
62659241Srwatson		if (I(tp->ftt_instr)) {
62759241Srwatson			int32_t imm;
62859241Srwatson
62983366Sjulian			imm = tp->ftt_instr << 19;
63059241Srwatson			imm >>= 19;
631175202Sattilio			target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
63265768Srwatson		} else {
63365768Srwatson			target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
63459241Srwatson			    fasttrap_getreg(rp, RS2(tp->ftt_instr));
63574273Srwatson		}
636115040Srwatson
63759241Srwatson		fasttrap_putreg(rp, R_O0, target);
63859241Srwatson
63959241Srwatson		pc = rp->r_g7 + FASTTRAP_OFF_RETURN;
64059241Srwatson		fake_restore = 1;
641115040Srwatson		break;
64259241Srwatson	}
64359241Srwatson
64459400Srwatson	case FASTTRAP_T_OR:
64559400Srwatson	{
64659400Srwatson		ulong_t value;
647115040Srwatson
64859400Srwatson		if (I(tp->ftt_instr)) {
64959400Srwatson			int32_t imm;
65059400Srwatson
65159400Srwatson			imm = tp->ftt_instr << 19;
65259400Srwatson			imm >>= 19;
65359400Srwatson			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | imm;
654115040Srwatson		} else {
65559400Srwatson			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) |
65659400Srwatson			    fasttrap_getreg(rp, RS2(tp->ftt_instr));
657101308Sjeff		}
65885578Srwatson
65985578Srwatson		fasttrap_putreg(rp, RD(tp->ftt_instr), value);
66059241Srwatson		pc = rp->r_npc;
661175294Sattilio		npc = pc + 4;
66259241Srwatson		break;
66359241Srwatson	}
664115040Srwatson
665175294Sattilio	case FASTTRAP_T_SETHI:
666115040Srwatson		if (RD(tp->ftt_instr) != R_G0) {
66759241Srwatson			uint32_t imm32 = tp->ftt_instr << 10;
668184205Sdes			fasttrap_putreg(rp, RD(tp->ftt_instr), (ulong_t)imm32);
66959241Srwatson		}
67059241Srwatson		pc = rp->r_npc;
67159241Srwatson		npc = pc + 4;
67259241Srwatson		break;
67370776Srwatson
67459241Srwatson	case FASTTRAP_T_CCR:
67559241Srwatson	{
67674437Srwatson		uint_t c, v, z, n, taken;
67783366Sjulian		uint_t ccr = rp->r_tstate >> TSTATE_CCR_SHIFT;
67859241Srwatson
679131066Srwatson		if (tp->ftt_cc != 0)
680131066Srwatson			ccr >>= 4;
68159241Srwatson
68285577Srwatson		c = (ccr >> 0) & 1;
68365377Srwatson		v = (ccr >> 1) & 1;
68465377Srwatson		z = (ccr >> 2) & 1;
68574437Srwatson		n = (ccr >> 3) & 1;
68659241Srwatson
68791814Sgreen		switch (tp->ftt_code) {
68859241Srwatson		case 0x0:	/* BN */
68959241Srwatson			taken = 0;		break;
69059241Srwatson		case 0x1:	/* BE */
691175202Sattilio			taken = z;		break;
692101308Sjeff		case 0x2:	/* BLE */
693175294Sattilio			taken = z | (n ^ v);	break;
69485578Srwatson		case 0x3:	/* BL */
69591406Sjhb			taken = n ^ v;		break;
69659241Srwatson		case 0x4:	/* BLEU */
697184205Sdes			taken = c | z;		break;
69859241Srwatson		case 0x5:	/* BCS (BLU) */
69959241Srwatson			taken = c;		break;
70059241Srwatson		case 0x6:	/* BNEG */
70159241Srwatson			taken = n;		break;
70259241Srwatson		case 0x7:	/* BVS */
70374273Srwatson			taken = v;		break;
70474273Srwatson		case 0x8:	/* BA */
70574273Srwatson			/*
70659241Srwatson			 * We handle the BA case differently since the annul
70759241Srwatson			 * bit means something slightly different.
70874273Srwatson			 */
709191990Sattilio			panic("fasttrap: mishandled a branch");
71059241Srwatson			taken = 1;		break;
711131066Srwatson		case 0x9:	/* BNE */
712191990Sattilio			taken = ~z;		break;
713131066Srwatson		case 0xa:	/* BG */
71459241Srwatson			taken = ~(z | (n ^ v));	break;
71566041Srwatson		case 0xb:	/* BGE */
71666041Srwatson			taken = ~(n ^ v);	break;
71766041Srwatson		case 0xc:	/* BGU */
71866041Srwatson			taken = ~(c | z);	break;
719164033Srwatson		case 0xd:	/* BCC (BGEU) */
720164033Srwatson			taken = ~c;		break;
72174273Srwatson		case 0xe:	/* BPOS */
722175294Sattilio			taken = ~n;		break;
72359241Srwatson		case 0xf:	/* BVC */
72474273Srwatson			taken = ~v;		break;
72559241Srwatson		}
726176752Srwatson
727176752Srwatson		if (taken & 1) {
728176752Srwatson			pc = rp->r_npc;
729176752Srwatson			npc = tp->ftt_dest;
730176752Srwatson		} else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
731176752Srwatson			/*
732176752Srwatson			 * Untaken annulled branches don't execute the
733176752Srwatson			 * instruction in the delay slot.
734176752Srwatson			 */
735176752Srwatson			pc = rp->r_npc + 4;
73659241Srwatson			npc = pc + 4;
73759241Srwatson		} else {
73874273Srwatson			pc = rp->r_npc;
739175294Sattilio			npc = pc + 4;
74074273Srwatson		}
74174273Srwatson		break;
74274273Srwatson	}
74374273Srwatson
74474273Srwatson	case FASTTRAP_T_FCC:
74583366Sjulian	{
74659241Srwatson		uint_t fcc;
74759241Srwatson		uint_t taken;
74859241Srwatson		uint64_t fsr;
74959241Srwatson
75074273Srwatson		dtrace_getfsr(&fsr);
751175294Sattilio
75274273Srwatson		if (tp->ftt_cc == 0) {
75374273Srwatson			fcc = (fsr >> 10) & 0x3;
75474273Srwatson		} else {
75574273Srwatson			uint_t shift;
75659241Srwatson			ASSERT(tp->ftt_cc <= 3);
75783366Sjulian			shift = 30 + tp->ftt_cc * 2;
75874273Srwatson			fcc = (fsr >> shift) & 0x3;
75974273Srwatson		}
76074273Srwatson
76159241Srwatson		switch (tp->ftt_code) {
76259241Srwatson		case 0x0:	/* FBN */
76374273Srwatson			taken = (1 << fcc) & (0|0|0|0);	break;
76474273Srwatson		case 0x1:	/* FBNE */
76574273Srwatson			taken = (1 << fcc) & (8|4|2|0);	break;
766175294Sattilio		case 0x2:	/* FBLG */
76774273Srwatson			taken = (1 << fcc) & (0|4|2|0);	break;
76874273Srwatson		case 0x3:	/* FBUL */
76959241Srwatson			taken = (1 << fcc) & (8|0|2|0);	break;
77074273Srwatson		case 0x4:	/* FBL */
77174273Srwatson			taken = (1 << fcc) & (0|0|2|0);	break;
77274273Srwatson		case 0x5:	/* FBUG */
77374273Srwatson			taken = (1 << fcc) & (8|4|0|0);	break;
774234613Strasz		case 0x6:	/* FBG */
77574273Srwatson			taken = (1 << fcc) & (0|4|0|0);	break;
77683366Sjulian		case 0x7:	/* FBU */
777234613Strasz			taken = (1 << fcc) & (8|0|0|0);	break;
77859241Srwatson		case 0x8:	/* FBA */
77959241Srwatson			/*
78059241Srwatson			 * We handle the FBA case differently since the annul
78159241Srwatson			 * bit means something slightly different.
78259241Srwatson			 */
78374273Srwatson			panic("fasttrap: mishandled a branch");
784175294Sattilio			taken = (1 << fcc) & (8|4|2|1);	break;
78574273Srwatson		case 0x9:	/* FBE */
78674273Srwatson			taken = (1 << fcc) & (0|0|0|1);	break;
78774273Srwatson		case 0xa:	/* FBUE */
78874273Srwatson			taken = (1 << fcc) & (8|0|0|1);	break;
78974273Srwatson		case 0xb:	/* FBGE */
790234613Strasz			taken = (1 << fcc) & (0|4|0|1);	break;
79185578Srwatson		case 0xc:	/* FBUGE */
79285578Srwatson			taken = (1 << fcc) & (8|4|0|1);	break;
793234613Strasz		case 0xd:	/* FBLE */
79459241Srwatson			taken = (1 << fcc) & (0|0|2|1);	break;
79559241Srwatson		case 0xe:	/* FBULE */
79659241Srwatson			taken = (1 << fcc) & (8|0|2|1);	break;
79759241Srwatson		case 0xf:	/* FBO */
79859241Srwatson			taken = (1 << fcc) & (0|4|2|1);	break;
79959241Srwatson		}
80059241Srwatson
80159241Srwatson		if (taken) {
80259241Srwatson			pc = rp->r_npc;
80370776Srwatson			npc = tp->ftt_dest;
80459241Srwatson		} else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
80559241Srwatson			/*
80695974Sphk			 * Untaken annulled branches don't execute the
80759241Srwatson			 * instruction in the delay slot.
80859241Srwatson			 */
80970776Srwatson			pc = rp->r_npc + 4;
81074437Srwatson			npc = pc + 4;
81170776Srwatson		} else {
81270776Srwatson			pc = rp->r_npc;
813104346Sdd			npc = pc + 4;
81470776Srwatson		}
81583366Sjulian		break;
81659241Srwatson	}
81759241Srwatson
81859241Srwatson	case FASTTRAP_T_REG:
819131066Srwatson	{
820131066Srwatson		int64_t value;
821131066Srwatson		uint_t taken;
82259241Srwatson		uint_t reg = RS1(tp->ftt_instr);
823234613Strasz
82459241Srwatson		/*
82574437Srwatson		 * An ILP32 process shouldn't be using a branch predicated on
82690448Srwatson		 * an %i or an %l since it would violate the ABI. It's a
82759241Srwatson		 * violation of the ABI because we can't ensure deterministic
828234613Strasz		 * behavior. We should have identified this case when we
82959241Srwatson		 * enabled the probe.
83059241Srwatson		 */
83159241Srwatson		ASSERT(p->p_model == DATAMODEL_LP64 || reg < 16);
83259241Srwatson
83359241Srwatson		value = (int64_t)fasttrap_getreg(rp, reg);
83459241Srwatson
83559241Srwatson		switch (tp->ftt_code) {
83659241Srwatson		case 0x1:	/* BRZ */
83759241Srwatson			taken = (value == 0);	break;
83874437Srwatson		case 0x2:	/* BRLEZ */
83990448Srwatson			taken = (value <= 0);	break;
84059241Srwatson		case 0x3:	/* BRLZ */
841131066Srwatson			taken = (value < 0);	break;
842131066Srwatson		case 0x5:	/* BRNZ */
843131066Srwatson			taken = (value != 0);	break;
844131066Srwatson		case 0x6:	/* BRGZ */
845131066Srwatson			taken = (value > 0);	break;
846131066Srwatson		case 0x7:	/* BRGEZ */
847131066Srwatson			taken = (value >= 0);	break;
848131066Srwatson		default:
849131066Srwatson		case 0x0:
850131066Srwatson		case 0x4:
85159241Srwatson			panic("fasttrap: mishandled a branch");
85259241Srwatson		}
85359241Srwatson
85459241Srwatson		if (taken) {
855115865Srwatson			pc = rp->r_npc;
85665377Srwatson			npc = tp->ftt_dest;
85765377Srwatson		} else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
858182721Strasz			/*
859102985Sphk			 * Untaken annulled branches don't execute the
860102985Sphk			 * instruction in the delay slot.
861102985Sphk			 */
86274437Srwatson			pc = rp->r_npc + 4;
86359241Srwatson			npc = pc + 4;
86491814Sgreen		} else {
86559241Srwatson			pc = rp->r_npc;
86659241Srwatson			npc = pc + 4;
86759913Srwatson		}
86859913Srwatson		break;
86970776Srwatson	}
87059241Srwatson
87190448Srwatson	case FASTTRAP_T_ALWAYS:
87259241Srwatson		/*
87359241Srwatson		 * BAs, BA,As...
87459241Srwatson		 */
87559241Srwatson
87670776Srwatson		if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
87759241Srwatson			/*
87859241Srwatson			 * Annulled branch always instructions never execute
87959241Srwatson			 * the instruction in the delay slot.
88059241Srwatson			 */
88159241Srwatson			pc = tp->ftt_dest;
88259241Srwatson			npc = tp->ftt_dest + 4;
88359241Srwatson		} else {
88459241Srwatson			pc = rp->r_npc;
88559241Srwatson			npc = tp->ftt_dest;
88659241Srwatson		}
88759241Srwatson		break;
88859241Srwatson
88959241Srwatson	case FASTTRAP_T_RDPC:
89059241Srwatson		fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
89159241Srwatson		pc = rp->r_npc;
89259241Srwatson		npc = pc + 4;
89383366Sjulian		break;
89459241Srwatson
89559241Srwatson	case FASTTRAP_T_CALL:
89659241Srwatson		/*
89765768Srwatson		 * It's a call _and_ link remember...
89865768Srwatson		 */
899141523Srwatson		rp->r_o7 = rp->r_pc;
90065768Srwatson		pc = rp->r_npc;
90165768Srwatson		npc = tp->ftt_dest;
90265768Srwatson		break;
90365768Srwatson
904175202Sattilio	case FASTTRAP_T_JMPL:
90559241Srwatson		pc = rp->r_npc;
90665768Srwatson
90765768Srwatson		if (I(tp->ftt_instr)) {
90859241Srwatson			uint_t rs1 = RS1(tp->ftt_instr);
90959241Srwatson			int32_t imm;
91059241Srwatson
91170776Srwatson			imm = tp->ftt_instr << 19;
91259241Srwatson			imm >>= 19;
91391814Sgreen			npc = fasttrap_getreg(rp, rs1) + imm;
91459241Srwatson		} else {
91559241Srwatson			uint_t rs1 = RS1(tp->ftt_instr);
91659241Srwatson			uint_t rs2 = RS2(tp->ftt_instr);
91770776Srwatson
91859388Srwatson			npc = fasttrap_getreg(rp, rs1) +
91959388Srwatson			    fasttrap_getreg(rp, rs2);
92059388Srwatson		}
92159388Srwatson
92259388Srwatson		/*
92359388Srwatson		 * Do the link part of the jump-and-link instruction.
92459388Srwatson		 */
925252437Spfg		fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
926252435Spfg
92791814Sgreen		break;
92859388Srwatson
92959388Srwatson	case FASTTRAP_T_COMMON:
93059388Srwatson	{
93170776Srwatson		curthread->t_dtrace_scrpc = rp->r_g7;
93259241Srwatson		curthread->t_dtrace_astpc = rp->r_g7 + FASTTRAP_OFF_FTRET;
93359241Srwatson
93459241Srwatson		/*
93559241Srwatson		 * Copy the instruction to a reserved location in the
93659241Srwatson		 * user-land thread structure, then set the PC to that
93790448Srwatson		 * location and leave the NPC alone. We take pains to ensure
93890448Srwatson		 * consistency in the instruction stream (See SPARC
93990448Srwatson		 * Architecture Manual Version 9, sections 8.4.7, A.20, and
94059241Srwatson		 * H.1.6; UltraSPARC I/II User's Manual, sections 3.1.1.1,
94190448Srwatson		 * and 13.6.4) by using the ASI ASI_BLK_COMMIT_S to copy the
94290448Srwatson		 * instruction into the user's address space without
94390448Srwatson		 * bypassing the I$. There's no AS_USER version of this ASI
94490448Srwatson		 * (as exist for other ASIs) so we use the lofault
94590448Srwatson		 * mechanism to catch faults.
94659241Srwatson		 */
94790448Srwatson		if (dtrace_blksuword32(rp->r_g7, &tp->ftt_instr, 1) == -1) {
94890448Srwatson			/*
94990448Srwatson			 * If the copyout fails, then the process's state
95090448Srwatson			 * is not consistent (the effects of the traced
95190448Srwatson			 * instruction will never be seen). This process
95290448Srwatson			 * cannot be allowed to continue execution.
95390448Srwatson			 */
95459241Srwatson			fasttrap_sigtrap(curproc, curthread, pc);
95590448Srwatson			return (0);
95690448Srwatson		}
95790448Srwatson
95890448Srwatson		curthread->t_dtrace_pc = pc;
95959241Srwatson		curthread->t_dtrace_npc = npc;
96090448Srwatson		curthread->t_dtrace_on = 1;
96190448Srwatson
96290448Srwatson		pc = curthread->t_dtrace_scrpc;
96359241Srwatson
96459241Srwatson		if (tp->ftt_retids != NULL) {
96590448Srwatson			curthread->t_dtrace_step = 1;
96690448Srwatson			curthread->t_dtrace_ret = 1;
96765768Srwatson			npc = curthread->t_dtrace_astpc;
96865768Srwatson		}
969175294Sattilio		break;
97065768Srwatson	}
97159241Srwatson
97259241Srwatson	default:
97359241Srwatson		panic("fasttrap: mishandled an instruction");
97459241Srwatson	}
975118131Srwatson
976118131Srwatson	/*
977118131Srwatson	 * This bit me in the ass a couple of times, so lets toss this
978118131Srwatson	 * in as a cursory sanity check.
979118131Srwatson	 */
980118131Srwatson	ASSERT(pc != rp->r_g7 + 4);
981118131Srwatson	ASSERT(pc != rp->r_g7 + 8);
982118131Srwatson
983118131Srwatsondone:
984118131Srwatson	/*
985118131Srwatson	 * If there were no return probes when we first found the tracepoint,
986118131Srwatson	 * we should feel no obligation to honor any return probes that were
987118131Srwatson	 * subsequently enabled -- they'll just have to wait until the next
988118131Srwatson	 * time around.
989131066Srwatson	 */
990131066Srwatson	if (tp->ftt_retids != NULL) {
991131066Srwatson		/*
992118131Srwatson		 * We need to wait until the results of the instruction are
993234613Strasz		 * apparent before invoking any return probes. If this
994118131Srwatson		 * instruction was emulated we can just call
995118131Srwatson		 * fasttrap_return_common(); if it needs to be executed, we
996118131Srwatson		 * need to wait until we return to the kernel.
997118131Srwatson		 */
998118131Srwatson		if (tp->ftt_type != FASTTRAP_T_COMMON) {
999234613Strasz			fasttrap_return_common(rp, orig_pc, pid, fake_restore);
1000118131Srwatson		} else {
1001118131Srwatson			ASSERT(curthread->t_dtrace_ret != 0);
1002118131Srwatson			ASSERT(curthread->t_dtrace_pc == orig_pc);
1003118131Srwatson			ASSERT(curthread->t_dtrace_scrpc == rp->r_g7);
1004118131Srwatson			ASSERT(npc == curthread->t_dtrace_astpc);
100570776Srwatson		}
100659241Srwatson	}
100759241Srwatson
100895974Sphk	ASSERT(pc != 0);
100959241Srwatson	rp->r_pc = pc;
101059241Srwatson	rp->r_npc = npc;
101170776Srwatson
101274437Srwatson	return (0);
101370776Srwatson}
101470776Srwatson
101570776Srwatsonint
101683366Sjulianfasttrap_return_probe(struct regs *rp)
101759241Srwatson{
101859241Srwatson	proc_t *p = ttoproc(curthread);
101959241Srwatson	pid_t pid;
1020131066Srwatson	uintptr_t pc = curthread->t_dtrace_pc;
1021131066Srwatson	uintptr_t npc = curthread->t_dtrace_npc;
1022131066Srwatson
102359241Srwatson	curthread->t_dtrace_pc = 0;
1024118131Srwatson	curthread->t_dtrace_npc = 0;
1025118131Srwatson	curthread->t_dtrace_scrpc = 0;
1026118131Srwatson	curthread->t_dtrace_astpc = 0;
1027118131Srwatson
1028118131Srwatson	/*
102959241Srwatson	 * Treat a child created by a call to vfork(2) as if it were its
1030234613Strasz	 * parent. We know there's only one thread of control in such a
1031225104Sae	 * process: this one.
1032118131Srwatson	 */
1033118131Srwatson	while (p->p_flag & SVFORK) {
1034118131Srwatson		p = p->p_parent;
1035234613Strasz	}
103659241Srwatson
103759241Srwatson	/*
103859241Srwatson	 * We set the %pc and %npc to their values when the traced
103959241Srwatson	 * instruction was initially executed so that it appears to
104059241Srwatson	 * dtrace_probe() that we're on the original instruction, and so that
104159241Srwatson	 * the user can't easily detect our complex web of lies.
104259241Srwatson	 * dtrace_return_probe() (our caller) will correctly set %pc and %npc
104359241Srwatson	 * after we return.
104459241Srwatson	 */
104574437Srwatson	rp->r_pc = pc;
104683366Sjulian	rp->r_npc = npc;
104759241Srwatson
1048131066Srwatson	pid = p->p_pid;
1049131066Srwatson	fasttrap_return_common(rp, pc, pid, 0);
1050131066Srwatson
1051131066Srwatson	return (0);
1052131066Srwatson}
1053131066Srwatson
1054131066Srwatsonint
1055131066Srwatsonfasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
1056131066Srwatson{
105759241Srwatson	fasttrap_instr_t instr = FASTTRAP_INSTR;
105859241Srwatson
105959241Srwatson	if (uwrite(p, &instr, 4, tp->ftt_pc) != 0)
106059241Srwatson		return (-1);
106159241Srwatson
106285577Srwatson	return (0);
106365377Srwatson}
106459241Srwatson
1065182721Straszint
1066102985Sphkfasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
1067102985Sphk{
1068102985Sphk	fasttrap_instr_t instr;
106974437Srwatson
107059241Srwatson	/*
107191814Sgreen	 * Distinguish between read or write failures and a changed
107259241Srwatson	 * instruction.
107359241Srwatson	 */
107470776Srwatson	if (uread(p, &instr, 4, tp->ftt_pc) != 0)
107559241Srwatson		return (0);
107670776Srwatson	if (instr != FASTTRAP_INSTR && instr != BREAKPOINT_INSTR)
107759241Srwatson		return (0);
107859241Srwatson	if (uwrite(p, &tp->ftt_instr, 4, tp->ftt_pc) != 0)
107959241Srwatson		return (-1);
108059241Srwatson
108159241Srwatson	return (0);
108259241Srwatson}
108359241Srwatson
108470776Srwatsonint
108559241Srwatsonfasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc,
108659241Srwatson    fasttrap_probe_type_t type)
108759241Srwatson{
108859241Srwatson	uint32_t instr;
108959241Srwatson	int32_t disp;
109059241Srwatson
109170776Srwatson	/*
109259241Srwatson	 * Read the instruction at the given address out of the process's
109359241Srwatson	 * address space. We don't have to worry about a debugger
109459241Srwatson	 * changing this instruction before we overwrite it with our trap
109559388Srwatson	 * instruction since P_PR_LOCK is set.
109659241Srwatson	 */
109759241Srwatson	if (uread(p, &instr, 4, pc) != 0)
109859241Srwatson		return (-1);
109959241Srwatson
110059241Srwatson	/*
110159241Srwatson	 * Decode the instruction to fill in the probe flags. We can have
110283366Sjulian	 * the process execute most instructions on its own using a pc/npc
110359241Srwatson	 * trick, but pc-relative control transfer present a problem since
110459241Srwatson	 * we're relocating the instruction. We emulate these instructions
110559241Srwatson	 * in the kernel. We assume a default type and over-write that as
110659241Srwatson	 * needed.
110765768Srwatson	 *
1108141523Srwatson	 * pc-relative instructions must be emulated for correctness;
110959241Srwatson	 * other instructions (which represent a large set of commonly traced
111065768Srwatson	 * instructions) are emulated or otherwise optimized for performance.
111159241Srwatson	 */
111259241Srwatson	tp->ftt_type = FASTTRAP_T_COMMON;
1113175202Sattilio	if (OP(instr) == 1) {
111459241Srwatson		/*
111575106Srwatson		 * Call instructions.
111675106Srwatson		 */
111775106Srwatson		tp->ftt_type = FASTTRAP_T_CALL;
111875106Srwatson		disp = DISP30(instr) << 2;
111975106Srwatson		tp->ftt_dest = pc + (intptr_t)disp;
112059241Srwatson
112159241Srwatson	} else if (OP(instr) == 0) {
112259241Srwatson		/*
112359268Srwatson		 * Branch instructions.
112459241Srwatson		 *
112559241Srwatson		 * Unconditional branches need careful attention when they're
112659268Srwatson		 * annulled: annulled unconditional branches never execute
112759241Srwatson		 * the instruction in the delay slot.
112859241Srwatson		 */
112970776Srwatson		switch (OP2(instr)) {
113059241Srwatson		case OP2_ILLTRAP:
113159241Srwatson		case 0x7:
113259241Srwatson			/*
113375106Srwatson			 * The compiler may place an illtrap after a call to
113475106Srwatson			 * a function that returns a structure. In the case of
113575106Srwatson			 * a returned structure, the compiler places an illtrap
113675106Srwatson			 * whose const22 field is the size of the returned
113775106Srwatson			 * structure immediately following the delay slot of
113875106Srwatson			 * the call. To stay out of the way, we refuse to
113959241Srwatson			 * place tracepoints on top of illtrap instructions.
114059241Srwatson			 *
114159241Srwatson			 * This is one of the dumbest architectural decisions
114259268Srwatson			 * I've ever had to work around.
1143175294Sattilio			 *
114459268Srwatson			 * We also identify the only illegal op2 value (See
114559241Srwatson			 * SPARC Architecture Manual Version 9, E.2 table 31).
114659241Srwatson			 */
114759241Srwatson			return (-1);
114859241Srwatson
114959241Srwatson		case OP2_BPcc:
115059241Srwatson			if (COND(instr) == 8) {
115159241Srwatson				tp->ftt_type = FASTTRAP_T_ALWAYS;
115259241Srwatson			} else {
115374437Srwatson				/*
115483366Sjulian				 * Check for an illegal instruction.
115559241Srwatson				 */
1156131066Srwatson				if (CC(instr) & 1)
1157131066Srwatson					return (-1);
1158131066Srwatson				tp->ftt_type = FASTTRAP_T_CCR;
1159131066Srwatson				tp->ftt_cc = CC(instr);
1160131066Srwatson				tp->ftt_code = COND(instr);
1161131066Srwatson			}
1162131066Srwatson
1163131066Srwatson			if (A(instr) != 0)
1164131066Srwatson				tp->ftt_flags |= FASTTRAP_F_ANNUL;
116559241Srwatson
116659241Srwatson			disp = DISP19(instr);
116759241Srwatson			disp <<= 13;
116859241Srwatson			disp >>= 11;
116959241Srwatson			tp->ftt_dest = pc + (intptr_t)disp;
117085577Srwatson			break;
117165377Srwatson
117259241Srwatson		case OP2_Bicc:
1173182721Strasz			if (COND(instr) == 8) {
1174102985Sphk				tp->ftt_type = FASTTRAP_T_ALWAYS;
1175102985Sphk			} else {
1176102985Sphk				tp->ftt_type = FASTTRAP_T_CCR;
117774437Srwatson				tp->ftt_cc = 0;
117859241Srwatson				tp->ftt_code = COND(instr);
117991814Sgreen			}
118059241Srwatson
118159241Srwatson			if (A(instr) != 0)
118259241Srwatson				tp->ftt_flags |= FASTTRAP_F_ANNUL;
118370776Srwatson
118459241Srwatson			disp = DISP22(instr);
118559241Srwatson			disp <<= 10;
118659241Srwatson			disp >>= 8;
118759241Srwatson			tp->ftt_dest = pc + (intptr_t)disp;
118859241Srwatson			break;
118959241Srwatson
119065768Srwatson		case OP2_BPr:
119159241Srwatson			/*
119259241Srwatson			 * Check for an illegal instruction.
119359241Srwatson			 */
119459241Srwatson			if ((RCOND(instr) & 3) == 0)
119559241Srwatson				return (-1);
119659241Srwatson
119759241Srwatson			/*
119859241Srwatson			 * It's a violation of the v8plus ABI to use a
119959241Srwatson			 * register-predicated branch in a 32-bit app if
120083366Sjulian			 * the register used is an %l or an %i (%gs and %os
120159241Srwatson			 * are legit because they're not saved to the stack
120259241Srwatson			 * in 32-bit words when we take a trap).
120359241Srwatson			 */
120459241Srwatson			if (p->p_model == DATAMODEL_ILP32 && RS1(instr) >= 16)
120559241Srwatson				return (-1);
120659241Srwatson
120759241Srwatson			tp->ftt_type = FASTTRAP_T_REG;
120859241Srwatson			if (A(instr) != 0)
1209175202Sattilio				tp->ftt_flags |= FASTTRAP_F_ANNUL;
121059241Srwatson			disp = DISP16(instr);
121165768Srwatson			disp <<= 16;
121265768Srwatson			disp >>= 14;
121359241Srwatson			tp->ftt_dest = pc + (intptr_t)disp;
121459241Srwatson			tp->ftt_code = RCOND(instr);
121559241Srwatson			break;
121670776Srwatson
121759241Srwatson		case OP2_SETHI:
121891814Sgreen			tp->ftt_type = FASTTRAP_T_SETHI;
121959241Srwatson			break;
122059241Srwatson
122159241Srwatson		case OP2_FBPfcc:
122270773Srwatson			if (COND(instr) == 8) {
122370773Srwatson				tp->ftt_type = FASTTRAP_T_ALWAYS;
122470773Srwatson			} else {
122570773Srwatson				tp->ftt_type = FASTTRAP_T_FCC;
122670776Srwatson				tp->ftt_cc = CC(instr);
122770773Srwatson				tp->ftt_code = COND(instr);
122870773Srwatson			}
122970773Srwatson
1230106673Sjhb			if (A(instr) != 0)
1231106673Sjhb				tp->ftt_flags |= FASTTRAP_F_ANNUL;
123291814Sgreen
123370773Srwatson			disp = DISP19(instr);
123470773Srwatson			disp <<= 13;
123570773Srwatson			disp >>= 11;
123670776Srwatson			tp->ftt_dest = pc + (intptr_t)disp;
123759241Srwatson			break;
123870774Srwatson
123959241Srwatson		case OP2_FBfcc:
124070774Srwatson			if (COND(instr) == 8) {
124170774Srwatson				tp->ftt_type = FASTTRAP_T_ALWAYS;
124270774Srwatson			} else {
124370774Srwatson				tp->ftt_type = FASTTRAP_T_FCC;
124470774Srwatson				tp->ftt_cc = 0;
124570774Srwatson				tp->ftt_code = COND(instr);
124683366Sjulian			}
124765768Srwatson
124865768Srwatson			if (A(instr) != 0)
124965768Srwatson				tp->ftt_flags |= FASTTRAP_F_ANNUL;
125075106Srwatson
125175106Srwatson			disp = DISP22(instr);
125275106Srwatson			disp <<= 10;
125375106Srwatson			disp >>= 8;
125475106Srwatson			tp->ftt_dest = pc + (intptr_t)disp;
125559241Srwatson			break;
125659241Srwatson		}
125759241Srwatson
125859241Srwatson	} else if (OP(instr) == 2) {
125959241Srwatson		switch (OP3(instr)) {
126059241Srwatson		case OP3_RETURN:
126159241Srwatson			tp->ftt_type = FASTTRAP_T_RETURN;
1262175294Sattilio			break;
126359241Srwatson
126459241Srwatson		case OP3_JMPL:
126559241Srwatson			tp->ftt_type = FASTTRAP_T_JMPL;
126659241Srwatson			break;
126759241Srwatson
126859241Srwatson		case OP3_RD:
126959241Srwatson			if (RS1(instr) == 5)
127059241Srwatson				tp->ftt_type = FASTTRAP_T_RDPC;
127159241Srwatson			break;
127283366Sjulian
127359241Srwatson		case OP3_SAVE:
1274131066Srwatson			/*
1275131066Srwatson			 * We optimize for save instructions at function
1276131066Srwatson			 * entry; see the comment in fasttrap_pid_probe()
127759241Srwatson			 * (near FASTTRAP_T_SAVE) for details.
127877190Stmm			 */
127977190Stmm			if (fasttrap_optimize_save != 0 &&
128077190Stmm			    type == DTFTP_ENTRY &&
128177190Stmm			    I(instr) == 1 && RD(instr) == R_SP)
128277190Stmm				tp->ftt_type = FASTTRAP_T_SAVE;
128377190Stmm			break;
128477190Stmm
128577190Stmm		case OP3_RESTORE:
1286234613Strasz			/*
128770776Srwatson			 * We optimize restore instructions at function
128859241Srwatson			 * return; see the comment in fasttrap_pid_probe()
1289234613Strasz			 * (near FASTTRAP_T_RESTORE) for details.
129059241Srwatson			 *
129159241Srwatson			 * rd must be an %o or %g register.
129259241Srwatson			 */
129372012Sphk			if ((RD(instr) & 0x10) == 0)
129474437Srwatson				tp->ftt_type = FASTTRAP_T_RESTORE;
129583366Sjulian			break;
129659241Srwatson
1297234613Strasz		case OP3_OR:
129859241Srwatson			/*
129974273Srwatson			 * A large proportion of instructions in the delay
130074433Srwatson			 * slot of retl instructions are or's so we emulate
1301			 * these downstairs as an optimization.
1302			 */
1303			tp->ftt_type = FASTTRAP_T_OR;
1304			break;
1305
1306		case OP3_TCC:
1307			/*
1308			 * Breakpoint instructions are effectively position-
1309			 * dependent since the debugger uses the %pc value
1310			 * to lookup which breakpoint was executed. As a
1311			 * result, we can't actually instrument breakpoints.
1312			 */
1313			if (SW_TRAP(instr) == ST_BREAKPOINT)
1314				return (-1);
1315			break;
1316
1317		case 0x19:
1318		case 0x1d:
1319		case 0x29:
1320		case 0x33:
1321		case 0x3f:
1322			/*
1323			 * Identify illegal instructions (See SPARC
1324			 * Architecture Manual Version 9, E.2 table 32).
1325			 */
1326			return (-1);
1327		}
1328	} else if (OP(instr) == 3) {
1329		uint32_t op3 = OP3(instr);
1330
1331		/*
1332		 * Identify illegal instructions (See SPARC Architecture
1333		 * Manual Version 9, E.2 table 33).
1334		 */
1335		if ((op3 & 0x28) == 0x28) {
1336			if (op3 != OP3_PREFETCH && op3 != OP3_CASA &&
1337			    op3 != OP3_PREFETCHA && op3 != OP3_CASXA)
1338				return (-1);
1339		} else {
1340			if ((op3 & 0x0f) == 0x0c || (op3 & 0x3b) == 0x31)
1341				return (-1);
1342		}
1343	}
1344
1345	tp->ftt_instr = instr;
1346
1347	/*
1348	 * We don't know how this tracepoint is going to be used, but in case
1349	 * it's used as part of a function return probe, we need to indicate
1350	 * whether it's always a return site or only potentially a return
1351	 * site. If it's part of a return probe, it's always going to be a
1352	 * return from that function if it's a restore instruction or if
1353	 * the previous instruction was a return. If we could reliably
1354	 * distinguish jump tables from return sites, this wouldn't be
1355	 * necessary.
1356	 */
1357	if (tp->ftt_type != FASTTRAP_T_RESTORE &&
1358	    (uread(p, &instr, 4, pc - sizeof (instr)) != 0 ||
1359	    !(OP(instr) == 2 && OP3(instr) == OP3_RETURN)))
1360		tp->ftt_flags |= FASTTRAP_F_RETMAYBE;
1361
1362	return (0);
1363}
1364
1365/*ARGSUSED*/
1366uint64_t
1367fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1368    int aframes)
1369{
1370	return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1371}
1372
1373/*ARGSUSED*/
1374uint64_t
1375fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1376    int aframes)
1377{
1378	return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1379}
1380
1381static uint64_t fasttrap_getreg_fast_cnt;
1382static uint64_t fasttrap_getreg_mpcb_cnt;
1383static uint64_t fasttrap_getreg_slow_cnt;
1384
1385static ulong_t
1386fasttrap_getreg(struct regs *rp, uint_t reg)
1387{
1388	ulong_t value;
1389	dtrace_icookie_t cookie;
1390	struct machpcb *mpcb;
1391	extern ulong_t dtrace_getreg_win(uint_t, uint_t);
1392
1393	/*
1394	 * We have the %os and %gs in our struct regs, but if we need to
1395	 * snag a %l or %i we need to go scrounging around in the process's
1396	 * address space.
1397	 */
1398	if (reg == 0)
1399		return (0);
1400
1401	if (reg < 16)
1402		return ((&rp->r_g1)[reg - 1]);
1403
1404	/*
1405	 * Before we look at the user's stack, we'll check the register
1406	 * windows to see if the information we want is in there.
1407	 */
1408	cookie = dtrace_interrupt_disable();
1409	if (dtrace_getotherwin() > 0) {
1410		value = dtrace_getreg_win(reg, 1);
1411		dtrace_interrupt_enable(cookie);
1412
1413		atomic_add_64(&fasttrap_getreg_fast_cnt, 1);
1414
1415		return (value);
1416	}
1417	dtrace_interrupt_enable(cookie);
1418
1419	/*
1420	 * First check the machpcb structure to see if we've already read
1421	 * in the register window we're looking for; if we haven't, (and
1422	 * we probably haven't) try to copy in the value of the register.
1423	 */
1424	/* LINTED - alignment */
1425	mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1426
1427	if (get_udatamodel() == DATAMODEL_NATIVE) {
1428		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1429
1430		if (mpcb->mpcb_wbcnt > 0) {
1431			struct rwindow *rwin = (void *)mpcb->mpcb_wbuf;
1432			int i = mpcb->mpcb_wbcnt;
1433			do {
1434				i--;
1435				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1436					continue;
1437
1438				atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1);
1439				return (rwin[i].rw_local[reg - 16]);
1440			} while (i > 0);
1441		}
1442
1443		if (fasttrap_fulword(&fr->fr_local[reg - 16], &value) != 0)
1444			goto err;
1445	} else {
1446		struct frame32 *fr =
1447		    (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1448		uint32_t *v32 = (uint32_t *)&value;
1449
1450		if (mpcb->mpcb_wbcnt > 0) {
1451			struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf;
1452			int i = mpcb->mpcb_wbcnt;
1453			do {
1454				i--;
1455				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1456					continue;
1457
1458				atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1);
1459				return (rwin[i].rw_local[reg - 16]);
1460			} while (i > 0);
1461		}
1462
1463		if (fasttrap_fuword32(&fr->fr_local[reg - 16], &v32[1]) != 0)
1464			goto err;
1465
1466		v32[0] = 0;
1467	}
1468
1469	atomic_add_64(&fasttrap_getreg_slow_cnt, 1);
1470	return (value);
1471
1472err:
1473	/*
1474	 * If the copy in failed, the process will be in a irrecoverable
1475	 * state, and we have no choice but to kill it.
1476	 */
1477	kern_psignal(ttoproc(curthread), SIGILL);
1478	return (0);
1479}
1480
1481static uint64_t fasttrap_putreg_fast_cnt;
1482static uint64_t fasttrap_putreg_mpcb_cnt;
1483static uint64_t fasttrap_putreg_slow_cnt;
1484
1485static void
1486fasttrap_putreg(struct regs *rp, uint_t reg, ulong_t value)
1487{
1488	dtrace_icookie_t cookie;
1489	struct machpcb *mpcb;
1490	extern void dtrace_putreg_win(uint_t, ulong_t);
1491
1492	if (reg == 0)
1493		return;
1494
1495	if (reg < 16) {
1496		(&rp->r_g1)[reg - 1] = value;
1497		return;
1498	}
1499
1500	/*
1501	 * If the user process is still using some register windows, we
1502	 * can just place the value in the correct window.
1503	 */
1504	cookie = dtrace_interrupt_disable();
1505	if (dtrace_getotherwin() > 0) {
1506		dtrace_putreg_win(reg, value);
1507		dtrace_interrupt_enable(cookie);
1508		atomic_add_64(&fasttrap_putreg_fast_cnt, 1);
1509		return;
1510	}
1511	dtrace_interrupt_enable(cookie);
1512
1513	/*
1514	 * First see if there's a copy of the register window in the
1515	 * machpcb structure that we can modify; if there isn't try to
1516	 * copy out the value. If that fails, we try to create a new
1517	 * register window in the machpcb structure. While this isn't
1518	 * _precisely_ the intended use of the machpcb structure, it
1519	 * can't cause any problems since we know at this point in the
1520	 * code that all of the user's data have been flushed out of the
1521	 * register file (since %otherwin is 0).
1522	 */
1523	/* LINTED - alignment */
1524	mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1525
1526	if (get_udatamodel() == DATAMODEL_NATIVE) {
1527		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1528		/* LINTED - alignment */
1529		struct rwindow *rwin = (struct rwindow *)mpcb->mpcb_wbuf;
1530
1531		if (mpcb->mpcb_wbcnt > 0) {
1532			int i = mpcb->mpcb_wbcnt;
1533			do {
1534				i--;
1535				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1536					continue;
1537
1538				rwin[i].rw_local[reg - 16] = value;
1539				atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1540				return;
1541			} while (i > 0);
1542		}
1543
1544		if (fasttrap_sulword(&fr->fr_local[reg - 16], value) != 0) {
1545			if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1546			    &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1547				goto err;
1548
1549			rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = value;
1550			mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1551			mpcb->mpcb_wbcnt++;
1552			atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1553			return;
1554		}
1555	} else {
1556		struct frame32 *fr =
1557		    (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1558		/* LINTED - alignment */
1559		struct rwindow32 *rwin = (struct rwindow32 *)mpcb->mpcb_wbuf;
1560		uint32_t v32 = (uint32_t)value;
1561
1562		if (mpcb->mpcb_wbcnt > 0) {
1563			int i = mpcb->mpcb_wbcnt;
1564			do {
1565				i--;
1566				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1567					continue;
1568
1569				rwin[i].rw_local[reg - 16] = v32;
1570				atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1571				return;
1572			} while (i > 0);
1573		}
1574
1575		if (fasttrap_suword32(&fr->fr_local[reg - 16], v32) != 0) {
1576			if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1577			    &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1578				goto err;
1579
1580			rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = v32;
1581			mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1582			mpcb->mpcb_wbcnt++;
1583			atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1584			return;
1585		}
1586	}
1587
1588	atomic_add_64(&fasttrap_putreg_slow_cnt, 1);
1589	return;
1590
1591err:
1592	/*
1593	 * If we couldn't record this register's value, the process is in an
1594	 * irrecoverable state and we have no choice but to euthanize it.
1595	 */
1596	kern_psignal(ttoproc(curthread), SIGILL);
1597}
1598