1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29#include <sys/fasttrap_isa.h>
30#include <sys/fasttrap_impl.h>
31#include <sys/dtrace.h>
32#include <sys/dtrace_impl.h>
33#include <sys/cmn_err.h>
34#include <sys/frame.h>
35#include <sys/stack.h>
36#include <sys/sysmacros.h>
37#include <sys/trap.h>
38
39#include <v9/sys/machpcb.h>
40#include <v9/sys/privregs.h>
41
42/*
43 * Lossless User-Land Tracing on SPARC
44 * -----------------------------------
45 *
46 * The Basic Idea
47 *
48 * The most important design constraint is, of course, correct execution of
49 * the user thread above all else. The next most important goal is rapid
50 * execution. We combine execution of instructions in user-land with
51 * emulation of certain instructions in the kernel to aim for complete
52 * correctness and maximal performance.
53 *
54 * We take advantage of the split PC/NPC architecture to speed up logical
55 * single-stepping; when we copy an instruction out to the scratch space in
56 * the ulwp_t structure (held in the %g7 register on SPARC), we can
57 * effectively single step by setting the PC to our scratch space and leaving
58 * the NPC alone. This executes the replaced instruction and then continues
59 * on without having to reenter the kernel as with single- stepping. The
60 * obvious caveat is for instructions whose execution is PC dependant --
61 * branches, call and link instructions (call and jmpl), and the rdpc
62 * instruction. These instructions cannot be executed in the manner described
63 * so they must be emulated in the kernel.
64 *
65 * Emulation for this small set of instructions if fairly simple; the most
66 * difficult part being emulating branch conditions.
67 *
68 *
69 * A Cache Heavy Portfolio
70 *
71 * It's important to note at this time that copying an instruction out to the
72 * ulwp_t scratch space in user-land is rather complicated. SPARC has
73 * separate data and instruction caches so any writes to the D$ (using a
74 * store instruction for example) aren't necessarily reflected in the I$.
75 * The flush instruction can be used to synchronize the two and must be used
76 * for any self-modifying code, but the flush instruction only applies to the
77 * primary address space (the absence of a flusha analogue to the flush
78 * instruction that accepts an ASI argument is an obvious omission from SPARC
79 * v9 where the notion of the alternate address space was introduced on
80 * SPARC). To correctly copy out the instruction we must use a block store
81 * that doesn't allocate in the D$ and ensures synchronization with the I$;
82 * see dtrace_blksuword32() for the implementation  (this function uses
83 * ASI_BLK_COMMIT_S to write a block through the secondary ASI in the manner
84 * described). Refer to the UltraSPARC I/II manual for details on the
85 * ASI_BLK_COMMIT_S ASI.
86 *
87 *
88 * Return Subtleties
89 *
90 * When we're firing a return probe we need to expose the value returned by
91 * the function being traced. Since the function can set the return value
92 * in its last instruction, we need to fire the return probe only _after_
93 * the effects of the instruction are apparent. For instructions that we
94 * emulate, we can call dtrace_probe() after we've performed the emulation;
95 * for instructions that we execute after we return to user-land, we set
96 * %pc to the instruction we copied out (as described above) and set %npc
97 * to a trap instruction stashed in the ulwp_t structure. After the traced
98 * instruction is executed, the trap instruction returns control to the
99 * kernel where we can fire the return probe.
100 *
101 * This need for a second trap in cases where we execute the traced
102 * instruction makes it all the more important to emulate the most common
103 * instructions to avoid the second trip in and out of the kernel.
104 *
105 *
106 * Making it Fast
107 *
108 * Since copying out an instruction is neither simple nor inexpensive for the
109 * CPU, we should attempt to avoid doing it in as many cases as possible.
110 * Since function entry and return are usually the most interesting probe
111 * sites, we attempt to tune the performance of the fasttrap provider around
112 * instructions typically in those places.
113 *
114 * Looking at a bunch of functions in libraries and executables reveals that
115 * most functions begin with either a save or a sethi (to setup a larger
116 * argument to the save) and end with a restore or an or (in the case of leaf
117 * functions). To try to improve performance, we emulate all of these
118 * instructions in the kernel.
119 *
120 * The save and restore instructions are a little tricky since they perform
121 * register window maniplulation. Rather than trying to tinker with the
122 * register windows from the kernel, we emulate the implicit add that takes
123 * place as part of those instructions and set the %pc to point to a simple
124 * save or restore we've hidden in the ulwp_t structure. If we're in a return
125 * probe so want to make it seem as though the tracepoint has been completely
126 * executed we need to remember that we've pulled this trick with restore and
127 * pull registers from the previous window (the one that we'll switch to once
128 * the simple store instruction is executed) rather than the current one. This
129 * is why in the case of emulating a restore we set the DTrace CPU flag
130 * CPU_DTRACE_FAKERESTORE before calling dtrace_probe() for the return probes
131 * (see fasttrap_return_common()).
132 */
133
134#define	OP(x)		((x) >> 30)
135#define	OP2(x)		(((x) >> 22) & 0x07)
136#define	OP3(x)		(((x) >> 19) & 0x3f)
137#define	RCOND(x)	(((x) >> 25) & 0x07)
138#define	COND(x)		(((x) >> 25) & 0x0f)
139#define	A(x)		(((x) >> 29) & 0x01)
140#define	I(x)		(((x) >> 13) & 0x01)
141#define	RD(x)		(((x) >> 25) & 0x1f)
142#define	RS1(x)		(((x) >> 14) & 0x1f)
143#define	RS2(x)		(((x) >> 0) & 0x1f)
144#define	CC(x)		(((x) >> 20) & 0x03)
145#define	DISP16(x)	((((x) >> 6) & 0xc000) | ((x) & 0x3fff))
146#define	DISP22(x)	((x) & 0x3fffff)
147#define	DISP19(x)	((x) & 0x7ffff)
148#define	DISP30(x)	((x) & 0x3fffffff)
149#define	SW_TRAP(x)	((x) & 0x7f)
150
151#define	OP3_OR		0x02
152#define	OP3_RD		0x28
153#define	OP3_JMPL	0x38
154#define	OP3_RETURN	0x39
155#define	OP3_TCC		0x3a
156#define	OP3_SAVE	0x3c
157#define	OP3_RESTORE	0x3d
158
159#define	OP3_PREFETCH	0x2d
160#define	OP3_CASA	0x3c
161#define	OP3_PREFETCHA	0x3d
162#define	OP3_CASXA	0x3e
163
164#define	OP2_ILLTRAP	0x0
165#define	OP2_BPcc	0x1
166#define	OP2_Bicc	0x2
167#define	OP2_BPr		0x3
168#define	OP2_SETHI	0x4
169#define	OP2_FBPfcc	0x5
170#define	OP2_FBfcc	0x6
171
172#define	R_G0		0
173#define	R_O0		8
174#define	R_SP		14
175#define	R_I0		24
176#define	R_I1		25
177#define	R_I2		26
178#define	R_I3		27
179#define	R_I4		28
180
181/*
182 * Check the comment in fasttrap.h when changing these offsets or adding
183 * new instructions.
184 */
185#define	FASTTRAP_OFF_SAVE	64
186#define	FASTTRAP_OFF_RESTORE	68
187#define	FASTTRAP_OFF_FTRET	72
188#define	FASTTRAP_OFF_RETURN	76
189
190#define	BREAKPOINT_INSTR	0x91d02001	/* ta 1 */
191
192/*
193 * Tunable to let users turn off the fancy save instruction optimization.
194 * If a program is non-ABI compliant, there's a possibility that the save
195 * instruction optimization could cause an error.
196 */
197int fasttrap_optimize_save = 1;
198
199static uint64_t
200fasttrap_anarg(struct regs *rp, int argno)
201{
202	uint64_t value;
203
204	if (argno < 6)
205		return ((&rp->r_o0)[argno]);
206
207	if (curproc->p_model == DATAMODEL_NATIVE) {
208		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
209
210		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
211		value = dtrace_fulword(&fr->fr_argd[argno]);
212		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
213		    CPU_DTRACE_BADALIGN);
214	} else {
215		struct frame32 *fr = (struct frame32 *)rp->r_sp;
216
217		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
218		value = dtrace_fuword32(&fr->fr_argd[argno]);
219		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
220		    CPU_DTRACE_BADALIGN);
221	}
222
223	return (value);
224}
225
226static ulong_t fasttrap_getreg(struct regs *, uint_t);
227static void fasttrap_putreg(struct regs *, uint_t, ulong_t);
228
229static void
230fasttrap_usdt_args(fasttrap_probe_t *probe, struct regs *rp,
231    uint_t fake_restore, int argc, uintptr_t *argv)
232{
233	int i, x, cap = MIN(argc, probe->ftp_nargs);
234	int inc = (fake_restore ? 16 : 0);
235
236	/*
237	 * The only way we'll hit the fake_restore case is if a USDT probe is
238	 * invoked as a tail-call. While it wouldn't be incorrect, we can
239	 * avoid a call to fasttrap_getreg(), and safely use rp->r_sp
240	 * directly since a tail-call can't be made if the invoked function
241	 * would use the argument dump space (i.e. if there were more than
242	 * 6 arguments). We take this shortcut because unconditionally rooting
243	 * around for R_FP (R_SP + 16) would be unnecessarily painful.
244	 */
245
246	if (curproc->p_model == DATAMODEL_NATIVE) {
247		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
248		uintptr_t v;
249
250		for (i = 0; i < cap; i++) {
251			x = probe->ftp_argmap[i];
252
253			if (x < 6)
254				argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
255			else if (fasttrap_fulword(&fr->fr_argd[x], &v) != 0)
256				argv[i] = 0;
257		}
258
259	} else {
260		struct frame32 *fr = (struct frame32 *)rp->r_sp;
261		uint32_t v;
262
263		for (i = 0; i < cap; i++) {
264			x = probe->ftp_argmap[i];
265
266			if (x < 6)
267				argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
268			else if (fasttrap_fuword32(&fr->fr_argd[x], &v) != 0)
269				argv[i] = 0;
270		}
271	}
272
273	for (; i < argc; i++) {
274		argv[i] = 0;
275	}
276}
277
278static void
279fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid,
280    uint_t fake_restore)
281{
282	fasttrap_tracepoint_t *tp;
283	fasttrap_bucket_t *bucket;
284	fasttrap_id_t *id;
285	kmutex_t *pid_mtx;
286	dtrace_icookie_t cookie;
287
288	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
289	mutex_enter(pid_mtx);
290	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
291
292	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
293		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
294		    tp->ftt_proc->ftpc_acount != 0)
295			break;
296	}
297
298	/*
299	 * Don't sweat it if we can't find the tracepoint again; unlike
300	 * when we're in fasttrap_pid_probe(), finding the tracepoint here
301	 * is not essential to the correct execution of the process.
302	 */
303	if (tp == NULL || tp->ftt_retids == NULL) {
304		mutex_exit(pid_mtx);
305		return;
306	}
307
308	for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
309		fasttrap_probe_t *probe = id->fti_probe;
310
311		if (id->fti_ptype == DTFTP_POST_OFFSETS) {
312			if (probe->ftp_argmap != NULL && fake_restore) {
313				uintptr_t t[5];
314
315				fasttrap_usdt_args(probe, rp, fake_restore,
316				    sizeof (t) / sizeof (t[0]), t);
317
318				cookie = dtrace_interrupt_disable();
319				DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
320				dtrace_probe(probe->ftp_id, t[0], t[1],
321				    t[2], t[3], t[4]);
322				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
323				dtrace_interrupt_enable(cookie);
324
325			} else if (probe->ftp_argmap != NULL) {
326				uintptr_t t[5];
327
328				fasttrap_usdt_args(probe, rp, fake_restore,
329				    sizeof (t) / sizeof (t[0]), t);
330
331				dtrace_probe(probe->ftp_id, t[0], t[1],
332				    t[2], t[3], t[4]);
333
334			} else if (fake_restore) {
335				uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
336				uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
337				uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
338				uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
339				uintptr_t arg4 = fasttrap_getreg(rp, R_I4);
340
341				cookie = dtrace_interrupt_disable();
342				DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
343				dtrace_probe(probe->ftp_id, arg0, arg1,
344				    arg2, arg3, arg4);
345				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
346				dtrace_interrupt_enable(cookie);
347
348			} else {
349				dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1,
350				    rp->r_o2, rp->r_o3, rp->r_o4);
351			}
352
353			continue;
354		}
355
356		/*
357		 * If this is only a possible return point, we must
358		 * be looking at a potential tail call in leaf context.
359		 * If the %npc is still within this function, then we
360		 * must have misidentified a jmpl as a tail-call when it
361		 * is, in fact, part of a jump table. It would be nice to
362		 * remove this tracepoint, but this is neither the time
363		 * nor the place.
364		 */
365		if ((tp->ftt_flags & FASTTRAP_F_RETMAYBE) &&
366		    rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
367			continue;
368
369		/*
370		 * It's possible for a function to branch to the delay slot
371		 * of an instruction that we've identified as a return site.
372		 * We can dectect this spurious return probe activation by
373		 * observing that in this case %npc will be %pc + 4 and %npc
374		 * will be inside the current function (unless the user is
375		 * doing _crazy_ instruction picking in which case there's
376		 * very little we can do). The second check is important
377		 * in case the last instructions of a function make a tail-
378		 * call to the function located immediately subsequent.
379		 */
380		if (rp->r_npc == rp->r_pc + 4 &&
381		    rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
382			continue;
383
384		/*
385		 * The first argument is the offset of return tracepoint
386		 * in the function; the remaining arguments are the return
387		 * values.
388		 *
389		 * If fake_restore is set, we need to pull the return values
390		 * out of the %i's rather than the %o's -- a little trickier.
391		 */
392		if (!fake_restore) {
393			dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
394			    rp->r_o0, rp->r_o1, rp->r_o2, rp->r_o3);
395		} else {
396			uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
397			uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
398			uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
399			uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
400
401			cookie = dtrace_interrupt_disable();
402			DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
403			dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
404			    arg0, arg1, arg2, arg3);
405			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
406			dtrace_interrupt_enable(cookie);
407		}
408	}
409
410	mutex_exit(pid_mtx);
411}
412
413int
414fasttrap_pid_probe(struct regs *rp)
415{
416	proc_t *p = curproc;
417	fasttrap_tracepoint_t *tp, tp_local;
418	fasttrap_id_t *id;
419	pid_t pid;
420	uintptr_t pc = rp->r_pc;
421	uintptr_t npc = rp->r_npc;
422	uintptr_t orig_pc = pc;
423	fasttrap_bucket_t *bucket;
424	kmutex_t *pid_mtx;
425	uint_t fake_restore = 0, is_enabled = 0;
426	dtrace_icookie_t cookie;
427
428	/*
429	 * It's possible that a user (in a veritable orgy of bad planning)
430	 * could redirect this thread's flow of control before it reached the
431	 * return probe fasttrap. In this case we need to kill the process
432	 * since it's in a unrecoverable state.
433	 */
434	if (curthread->t_dtrace_step) {
435		ASSERT(curthread->t_dtrace_on);
436		fasttrap_sigtrap(p, curthread, pc);
437		return (0);
438	}
439
440	/*
441	 * Clear all user tracing flags.
442	 */
443	curthread->t_dtrace_ft = 0;
444	curthread->t_dtrace_pc = 0;
445	curthread->t_dtrace_npc = 0;
446	curthread->t_dtrace_scrpc = 0;
447	curthread->t_dtrace_astpc = 0;
448
449	/*
450	 * Treat a child created by a call to vfork(2) as if it were its
451	 * parent. We know that there's only one thread of control in such a
452	 * process: this one.
453	 */
454	while (p->p_flag & SVFORK) {
455		p = p->p_parent;
456	}
457
458	pid = p->p_pid;
459	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
460	mutex_enter(pid_mtx);
461	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
462
463	/*
464	 * Lookup the tracepoint that the process just hit.
465	 */
466	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
467		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
468		    tp->ftt_proc->ftpc_acount != 0)
469			break;
470	}
471
472	/*
473	 * If we couldn't find a matching tracepoint, either a tracepoint has
474	 * been inserted without using the pid<pid> ioctl interface (see
475	 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
476	 */
477	if (tp == NULL) {
478		mutex_exit(pid_mtx);
479		return (-1);
480	}
481
482	for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
483		fasttrap_probe_t *probe = id->fti_probe;
484		int isentry = (id->fti_ptype == DTFTP_ENTRY);
485
486		if (id->fti_ptype == DTFTP_IS_ENABLED) {
487			is_enabled = 1;
488			continue;
489		}
490
491		/*
492		 * We note that this was an entry probe to help ustack() find
493		 * the first caller.
494		 */
495		if (isentry) {
496			cookie = dtrace_interrupt_disable();
497			DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
498		}
499		dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, rp->r_o2,
500		    rp->r_o3, rp->r_o4);
501		if (isentry) {
502			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
503			dtrace_interrupt_enable(cookie);
504		}
505	}
506
507	/*
508	 * We're about to do a bunch of work so we cache a local copy of
509	 * the tracepoint to emulate the instruction, and then find the
510	 * tracepoint again later if we need to light up any return probes.
511	 */
512	tp_local = *tp;
513	mutex_exit(pid_mtx);
514	tp = &tp_local;
515
516	/*
517	 * If there's an is-enabled probe conntected to this tracepoint it
518	 * means that there was a 'mov %g0, %o0' instruction that was placed
519	 * there by DTrace when the binary was linked. As this probe is, in
520	 * fact, enabled, we need to stuff 1 into %o0. Accordingly, we can
521	 * bypass all the instruction emulation logic since we know the
522	 * inevitable result. It's possible that a user could construct a
523	 * scenario where the 'is-enabled' probe was on some other
524	 * instruction, but that would be a rather exotic way to shoot oneself
525	 * in the foot.
526	 */
527	if (is_enabled) {
528		rp->r_o0 = 1;
529		pc = rp->r_npc;
530		npc = pc + 4;
531		goto done;
532	}
533
534	/*
535	 * We emulate certain types of instructions to ensure correctness
536	 * (in the case of position dependent instructions) or optimize
537	 * common cases. The rest we have the thread execute back in user-
538	 * land.
539	 */
540	switch (tp->ftt_type) {
541	case FASTTRAP_T_SAVE:
542	{
543		int32_t imm;
544
545		/*
546		 * This an optimization to let us handle function entry
547		 * probes more efficiently. Many functions begin with a save
548		 * instruction that follows the pattern:
549		 *	save	%sp, <imm>, %sp
550		 *
551		 * Meanwhile, we've stashed the instruction:
552		 *	save	%g1, %g0, %sp
553		 *
554		 * off of %g7, so all we have to do is stick the right value
555		 * into %g1 and reset %pc to point to the instruction we've
556		 * cleverly hidden (%npc should not be touched).
557		 */
558
559		imm = tp->ftt_instr << 19;
560		imm >>= 19;
561		rp->r_g1 = rp->r_sp + imm;
562		pc = rp->r_g7 + FASTTRAP_OFF_SAVE;
563		break;
564	}
565
566	case FASTTRAP_T_RESTORE:
567	{
568		ulong_t value;
569		uint_t rd;
570
571		/*
572		 * This is an optimization to let us handle function
573		 * return probes more efficiently. Most non-leaf functions
574		 * end with the sequence:
575		 *	ret
576		 *	restore	<reg>, <reg_or_imm>, %oX
577		 *
578		 * We've stashed the instruction:
579		 *	restore	%g0, %g0, %g0
580		 *
581		 * off of %g7 so we just need to place the correct value
582		 * in the right %i register (since after our fake-o
583		 * restore, the %i's will become the %o's) and set the %pc
584		 * to point to our hidden restore. We also set fake_restore to
585		 * let fasttrap_return_common() know that it will find the
586		 * return values in the %i's rather than the %o's.
587		 */
588
589		if (I(tp->ftt_instr)) {
590			int32_t imm;
591
592			imm = tp->ftt_instr << 19;
593			imm >>= 19;
594			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
595		} else {
596			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
597			    fasttrap_getreg(rp, RS2(tp->ftt_instr));
598		}
599
600		/*
601		 * Convert %o's to %i's; leave %g's as they are.
602		 */
603		rd = RD(tp->ftt_instr);
604		fasttrap_putreg(rp, ((rd & 0x18) == 0x8) ? rd + 16 : rd, value);
605
606		pc = rp->r_g7 + FASTTRAP_OFF_RESTORE;
607		fake_restore = 1;
608		break;
609	}
610
611	case FASTTRAP_T_RETURN:
612	{
613		uintptr_t target;
614
615		/*
616		 * A return instruction is like a jmpl (without the link
617		 * part) that executes an implicit restore. We've stashed
618		 * the instruction:
619		 *	return %o0
620		 *
621		 * off of %g7 so we just need to place the target in %o0
622		 * and set the %pc to point to the stashed return instruction.
623		 * We use %o0 since that register disappears after the return
624		 * executes, erasing any evidence of this tampering.
625		 */
626		if (I(tp->ftt_instr)) {
627			int32_t imm;
628
629			imm = tp->ftt_instr << 19;
630			imm >>= 19;
631			target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
632		} else {
633			target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
634			    fasttrap_getreg(rp, RS2(tp->ftt_instr));
635		}
636
637		fasttrap_putreg(rp, R_O0, target);
638
639		pc = rp->r_g7 + FASTTRAP_OFF_RETURN;
640		fake_restore = 1;
641		break;
642	}
643
644	case FASTTRAP_T_OR:
645	{
646		ulong_t value;
647
648		if (I(tp->ftt_instr)) {
649			int32_t imm;
650
651			imm = tp->ftt_instr << 19;
652			imm >>= 19;
653			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | imm;
654		} else {
655			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) |
656			    fasttrap_getreg(rp, RS2(tp->ftt_instr));
657		}
658
659		fasttrap_putreg(rp, RD(tp->ftt_instr), value);
660		pc = rp->r_npc;
661		npc = pc + 4;
662		break;
663	}
664
665	case FASTTRAP_T_SETHI:
666		if (RD(tp->ftt_instr) != R_G0) {
667			uint32_t imm32 = tp->ftt_instr << 10;
668			fasttrap_putreg(rp, RD(tp->ftt_instr), (ulong_t)imm32);
669		}
670		pc = rp->r_npc;
671		npc = pc + 4;
672		break;
673
674	case FASTTRAP_T_CCR:
675	{
676		uint_t c, v, z, n, taken;
677		uint_t ccr = rp->r_tstate >> TSTATE_CCR_SHIFT;
678
679		if (tp->ftt_cc != 0)
680			ccr >>= 4;
681
682		c = (ccr >> 0) & 1;
683		v = (ccr >> 1) & 1;
684		z = (ccr >> 2) & 1;
685		n = (ccr >> 3) & 1;
686
687		switch (tp->ftt_code) {
688		case 0x0:	/* BN */
689			taken = 0;		break;
690		case 0x1:	/* BE */
691			taken = z;		break;
692		case 0x2:	/* BLE */
693			taken = z | (n ^ v);	break;
694		case 0x3:	/* BL */
695			taken = n ^ v;		break;
696		case 0x4:	/* BLEU */
697			taken = c | z;		break;
698		case 0x5:	/* BCS (BLU) */
699			taken = c;		break;
700		case 0x6:	/* BNEG */
701			taken = n;		break;
702		case 0x7:	/* BVS */
703			taken = v;		break;
704		case 0x8:	/* BA */
705			/*
706			 * We handle the BA case differently since the annul
707			 * bit means something slightly different.
708			 */
709			panic("fasttrap: mishandled a branch");
710			taken = 1;		break;
711		case 0x9:	/* BNE */
712			taken = ~z;		break;
713		case 0xa:	/* BG */
714			taken = ~(z | (n ^ v));	break;
715		case 0xb:	/* BGE */
716			taken = ~(n ^ v);	break;
717		case 0xc:	/* BGU */
718			taken = ~(c | z);	break;
719		case 0xd:	/* BCC (BGEU) */
720			taken = ~c;		break;
721		case 0xe:	/* BPOS */
722			taken = ~n;		break;
723		case 0xf:	/* BVC */
724			taken = ~v;		break;
725		}
726
727		if (taken & 1) {
728			pc = rp->r_npc;
729			npc = tp->ftt_dest;
730		} else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
731			/*
732			 * Untaken annulled branches don't execute the
733			 * instruction in the delay slot.
734			 */
735			pc = rp->r_npc + 4;
736			npc = pc + 4;
737		} else {
738			pc = rp->r_npc;
739			npc = pc + 4;
740		}
741		break;
742	}
743
744	case FASTTRAP_T_FCC:
745	{
746		uint_t fcc;
747		uint_t taken;
748		uint64_t fsr;
749
750		dtrace_getfsr(&fsr);
751
752		if (tp->ftt_cc == 0) {
753			fcc = (fsr >> 10) & 0x3;
754		} else {
755			uint_t shift;
756			ASSERT(tp->ftt_cc <= 3);
757			shift = 30 + tp->ftt_cc * 2;
758			fcc = (fsr >> shift) & 0x3;
759		}
760
761		switch (tp->ftt_code) {
762		case 0x0:	/* FBN */
763			taken = (1 << fcc) & (0|0|0|0);	break;
764		case 0x1:	/* FBNE */
765			taken = (1 << fcc) & (8|4|2|0);	break;
766		case 0x2:	/* FBLG */
767			taken = (1 << fcc) & (0|4|2|0);	break;
768		case 0x3:	/* FBUL */
769			taken = (1 << fcc) & (8|0|2|0);	break;
770		case 0x4:	/* FBL */
771			taken = (1 << fcc) & (0|0|2|0);	break;
772		case 0x5:	/* FBUG */
773			taken = (1 << fcc) & (8|4|0|0);	break;
774		case 0x6:	/* FBG */
775			taken = (1 << fcc) & (0|4|0|0);	break;
776		case 0x7:	/* FBU */
777			taken = (1 << fcc) & (8|0|0|0);	break;
778		case 0x8:	/* FBA */
779			/*
780			 * We handle the FBA case differently since the annul
781			 * bit means something slightly different.
782			 */
783			panic("fasttrap: mishandled a branch");
784			taken = (1 << fcc) & (8|4|2|1);	break;
785		case 0x9:	/* FBE */
786			taken = (1 << fcc) & (0|0|0|1);	break;
787		case 0xa:	/* FBUE */
788			taken = (1 << fcc) & (8|0|0|1);	break;
789		case 0xb:	/* FBGE */
790			taken = (1 << fcc) & (0|4|0|1);	break;
791		case 0xc:	/* FBUGE */
792			taken = (1 << fcc) & (8|4|0|1);	break;
793		case 0xd:	/* FBLE */
794			taken = (1 << fcc) & (0|0|2|1);	break;
795		case 0xe:	/* FBULE */
796			taken = (1 << fcc) & (8|0|2|1);	break;
797		case 0xf:	/* FBO */
798			taken = (1 << fcc) & (0|4|2|1);	break;
799		}
800
801		if (taken) {
802			pc = rp->r_npc;
803			npc = tp->ftt_dest;
804		} else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
805			/*
806			 * Untaken annulled branches don't execute the
807			 * instruction in the delay slot.
808			 */
809			pc = rp->r_npc + 4;
810			npc = pc + 4;
811		} else {
812			pc = rp->r_npc;
813			npc = pc + 4;
814		}
815		break;
816	}
817
818	case FASTTRAP_T_REG:
819	{
820		int64_t value;
821		uint_t taken;
822		uint_t reg = RS1(tp->ftt_instr);
823
824		/*
825		 * An ILP32 process shouldn't be using a branch predicated on
826		 * an %i or an %l since it would violate the ABI. It's a
827		 * violation of the ABI because we can't ensure deterministic
828		 * behavior. We should have identified this case when we
829		 * enabled the probe.
830		 */
831		ASSERT(p->p_model == DATAMODEL_LP64 || reg < 16);
832
833		value = (int64_t)fasttrap_getreg(rp, reg);
834
835		switch (tp->ftt_code) {
836		case 0x1:	/* BRZ */
837			taken = (value == 0);	break;
838		case 0x2:	/* BRLEZ */
839			taken = (value <= 0);	break;
840		case 0x3:	/* BRLZ */
841			taken = (value < 0);	break;
842		case 0x5:	/* BRNZ */
843			taken = (value != 0);	break;
844		case 0x6:	/* BRGZ */
845			taken = (value > 0);	break;
846		case 0x7:	/* BRGEZ */
847			taken = (value >= 0);	break;
848		default:
849		case 0x0:
850		case 0x4:
851			panic("fasttrap: mishandled a branch");
852		}
853
854		if (taken) {
855			pc = rp->r_npc;
856			npc = tp->ftt_dest;
857		} else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
858			/*
859			 * Untaken annulled branches don't execute the
860			 * instruction in the delay slot.
861			 */
862			pc = rp->r_npc + 4;
863			npc = pc + 4;
864		} else {
865			pc = rp->r_npc;
866			npc = pc + 4;
867		}
868		break;
869	}
870
871	case FASTTRAP_T_ALWAYS:
872		/*
873		 * BAs, BA,As...
874		 */
875
876		if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
877			/*
878			 * Annulled branch always instructions never execute
879			 * the instruction in the delay slot.
880			 */
881			pc = tp->ftt_dest;
882			npc = tp->ftt_dest + 4;
883		} else {
884			pc = rp->r_npc;
885			npc = tp->ftt_dest;
886		}
887		break;
888
889	case FASTTRAP_T_RDPC:
890		fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
891		pc = rp->r_npc;
892		npc = pc + 4;
893		break;
894
895	case FASTTRAP_T_CALL:
896		/*
897		 * It's a call _and_ link remember...
898		 */
899		rp->r_o7 = rp->r_pc;
900		pc = rp->r_npc;
901		npc = tp->ftt_dest;
902		break;
903
904	case FASTTRAP_T_JMPL:
905		pc = rp->r_npc;
906
907		if (I(tp->ftt_instr)) {
908			uint_t rs1 = RS1(tp->ftt_instr);
909			int32_t imm;
910
911			imm = tp->ftt_instr << 19;
912			imm >>= 19;
913			npc = fasttrap_getreg(rp, rs1) + imm;
914		} else {
915			uint_t rs1 = RS1(tp->ftt_instr);
916			uint_t rs2 = RS2(tp->ftt_instr);
917
918			npc = fasttrap_getreg(rp, rs1) +
919			    fasttrap_getreg(rp, rs2);
920		}
921
922		/*
923		 * Do the link part of the jump-and-link instruction.
924		 */
925		fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
926
927		break;
928
929	case FASTTRAP_T_COMMON:
930	{
931		curthread->t_dtrace_scrpc = rp->r_g7;
932		curthread->t_dtrace_astpc = rp->r_g7 + FASTTRAP_OFF_FTRET;
933
934		/*
935		 * Copy the instruction to a reserved location in the
936		 * user-land thread structure, then set the PC to that
937		 * location and leave the NPC alone. We take pains to ensure
938		 * consistency in the instruction stream (See SPARC
939		 * Architecture Manual Version 9, sections 8.4.7, A.20, and
940		 * H.1.6; UltraSPARC I/II User's Manual, sections 3.1.1.1,
941		 * and 13.6.4) by using the ASI ASI_BLK_COMMIT_S to copy the
942		 * instruction into the user's address space without
943		 * bypassing the I$. There's no AS_USER version of this ASI
944		 * (as exist for other ASIs) so we use the lofault
945		 * mechanism to catch faults.
946		 */
947		if (dtrace_blksuword32(rp->r_g7, &tp->ftt_instr, 1) == -1) {
948			/*
949			 * If the copyout fails, then the process's state
950			 * is not consistent (the effects of the traced
951			 * instruction will never be seen). This process
952			 * cannot be allowed to continue execution.
953			 */
954			fasttrap_sigtrap(curproc, curthread, pc);
955			return (0);
956		}
957
958		curthread->t_dtrace_pc = pc;
959		curthread->t_dtrace_npc = npc;
960		curthread->t_dtrace_on = 1;
961
962		pc = curthread->t_dtrace_scrpc;
963
964		if (tp->ftt_retids != NULL) {
965			curthread->t_dtrace_step = 1;
966			curthread->t_dtrace_ret = 1;
967			npc = curthread->t_dtrace_astpc;
968		}
969		break;
970	}
971
972	default:
973		panic("fasttrap: mishandled an instruction");
974	}
975
976	/*
977	 * This bit me in the ass a couple of times, so lets toss this
978	 * in as a cursory sanity check.
979	 */
980	ASSERT(pc != rp->r_g7 + 4);
981	ASSERT(pc != rp->r_g7 + 8);
982
983done:
984	/*
985	 * If there were no return probes when we first found the tracepoint,
986	 * we should feel no obligation to honor any return probes that were
987	 * subsequently enabled -- they'll just have to wait until the next
988	 * time around.
989	 */
990	if (tp->ftt_retids != NULL) {
991		/*
992		 * We need to wait until the results of the instruction are
993		 * apparent before invoking any return probes. If this
994		 * instruction was emulated we can just call
995		 * fasttrap_return_common(); if it needs to be executed, we
996		 * need to wait until we return to the kernel.
997		 */
998		if (tp->ftt_type != FASTTRAP_T_COMMON) {
999			fasttrap_return_common(rp, orig_pc, pid, fake_restore);
1000		} else {
1001			ASSERT(curthread->t_dtrace_ret != 0);
1002			ASSERT(curthread->t_dtrace_pc == orig_pc);
1003			ASSERT(curthread->t_dtrace_scrpc == rp->r_g7);
1004			ASSERT(npc == curthread->t_dtrace_astpc);
1005		}
1006	}
1007
1008	ASSERT(pc != 0);
1009	rp->r_pc = pc;
1010	rp->r_npc = npc;
1011
1012	return (0);
1013}
1014
1015int
1016fasttrap_return_probe(struct regs *rp)
1017{
1018	proc_t *p = ttoproc(curthread);
1019	pid_t pid;
1020	uintptr_t pc = curthread->t_dtrace_pc;
1021	uintptr_t npc = curthread->t_dtrace_npc;
1022
1023	curthread->t_dtrace_pc = 0;
1024	curthread->t_dtrace_npc = 0;
1025	curthread->t_dtrace_scrpc = 0;
1026	curthread->t_dtrace_astpc = 0;
1027
1028	/*
1029	 * Treat a child created by a call to vfork(2) as if it were its
1030	 * parent. We know there's only one thread of control in such a
1031	 * process: this one.
1032	 */
1033	while (p->p_flag & SVFORK) {
1034		p = p->p_parent;
1035	}
1036
1037	/*
1038	 * We set the %pc and %npc to their values when the traced
1039	 * instruction was initially executed so that it appears to
1040	 * dtrace_probe() that we're on the original instruction, and so that
1041	 * the user can't easily detect our complex web of lies.
1042	 * dtrace_return_probe() (our caller) will correctly set %pc and %npc
1043	 * after we return.
1044	 */
1045	rp->r_pc = pc;
1046	rp->r_npc = npc;
1047
1048	pid = p->p_pid;
1049	fasttrap_return_common(rp, pc, pid, 0);
1050
1051	return (0);
1052}
1053
1054int
1055fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
1056{
1057	fasttrap_instr_t instr = FASTTRAP_INSTR;
1058
1059	if (uwrite(p, &instr, 4, tp->ftt_pc) != 0)
1060		return (-1);
1061
1062	return (0);
1063}
1064
1065int
1066fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
1067{
1068	fasttrap_instr_t instr;
1069
1070	/*
1071	 * Distinguish between read or write failures and a changed
1072	 * instruction.
1073	 */
1074	if (uread(p, &instr, 4, tp->ftt_pc) != 0)
1075		return (0);
1076	if (instr != FASTTRAP_INSTR && instr != BREAKPOINT_INSTR)
1077		return (0);
1078	if (uwrite(p, &tp->ftt_instr, 4, tp->ftt_pc) != 0)
1079		return (-1);
1080
1081	return (0);
1082}
1083
1084int
1085fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc,
1086    fasttrap_probe_type_t type)
1087{
1088	uint32_t instr;
1089	int32_t disp;
1090
1091	/*
1092	 * Read the instruction at the given address out of the process's
1093	 * address space. We don't have to worry about a debugger
1094	 * changing this instruction before we overwrite it with our trap
1095	 * instruction since P_PR_LOCK is set.
1096	 */
1097	if (uread(p, &instr, 4, pc) != 0)
1098		return (-1);
1099
1100	/*
1101	 * Decode the instruction to fill in the probe flags. We can have
1102	 * the process execute most instructions on its own using a pc/npc
1103	 * trick, but pc-relative control transfer present a problem since
1104	 * we're relocating the instruction. We emulate these instructions
1105	 * in the kernel. We assume a default type and over-write that as
1106	 * needed.
1107	 *
1108	 * pc-relative instructions must be emulated for correctness;
1109	 * other instructions (which represent a large set of commonly traced
1110	 * instructions) are emulated or otherwise optimized for performance.
1111	 */
1112	tp->ftt_type = FASTTRAP_T_COMMON;
1113	if (OP(instr) == 1) {
1114		/*
1115		 * Call instructions.
1116		 */
1117		tp->ftt_type = FASTTRAP_T_CALL;
1118		disp = DISP30(instr) << 2;
1119		tp->ftt_dest = pc + (intptr_t)disp;
1120
1121	} else if (OP(instr) == 0) {
1122		/*
1123		 * Branch instructions.
1124		 *
1125		 * Unconditional branches need careful attention when they're
1126		 * annulled: annulled unconditional branches never execute
1127		 * the instruction in the delay slot.
1128		 */
1129		switch (OP2(instr)) {
1130		case OP2_ILLTRAP:
1131		case 0x7:
1132			/*
1133			 * The compiler may place an illtrap after a call to
1134			 * a function that returns a structure. In the case of
1135			 * a returned structure, the compiler places an illtrap
1136			 * whose const22 field is the size of the returned
1137			 * structure immediately following the delay slot of
1138			 * the call. To stay out of the way, we refuse to
1139			 * place tracepoints on top of illtrap instructions.
1140			 *
1141			 * This is one of the dumbest architectural decisions
1142			 * I've ever had to work around.
1143			 *
1144			 * We also identify the only illegal op2 value (See
1145			 * SPARC Architecture Manual Version 9, E.2 table 31).
1146			 */
1147			return (-1);
1148
1149		case OP2_BPcc:
1150			if (COND(instr) == 8) {
1151				tp->ftt_type = FASTTRAP_T_ALWAYS;
1152			} else {
1153				/*
1154				 * Check for an illegal instruction.
1155				 */
1156				if (CC(instr) & 1)
1157					return (-1);
1158				tp->ftt_type = FASTTRAP_T_CCR;
1159				tp->ftt_cc = CC(instr);
1160				tp->ftt_code = COND(instr);
1161			}
1162
1163			if (A(instr) != 0)
1164				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1165
1166			disp = DISP19(instr);
1167			disp <<= 13;
1168			disp >>= 11;
1169			tp->ftt_dest = pc + (intptr_t)disp;
1170			break;
1171
1172		case OP2_Bicc:
1173			if (COND(instr) == 8) {
1174				tp->ftt_type = FASTTRAP_T_ALWAYS;
1175			} else {
1176				tp->ftt_type = FASTTRAP_T_CCR;
1177				tp->ftt_cc = 0;
1178				tp->ftt_code = COND(instr);
1179			}
1180
1181			if (A(instr) != 0)
1182				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1183
1184			disp = DISP22(instr);
1185			disp <<= 10;
1186			disp >>= 8;
1187			tp->ftt_dest = pc + (intptr_t)disp;
1188			break;
1189
1190		case OP2_BPr:
1191			/*
1192			 * Check for an illegal instruction.
1193			 */
1194			if ((RCOND(instr) & 3) == 0)
1195				return (-1);
1196
1197			/*
1198			 * It's a violation of the v8plus ABI to use a
1199			 * register-predicated branch in a 32-bit app if
1200			 * the register used is an %l or an %i (%gs and %os
1201			 * are legit because they're not saved to the stack
1202			 * in 32-bit words when we take a trap).
1203			 */
1204			if (p->p_model == DATAMODEL_ILP32 && RS1(instr) >= 16)
1205				return (-1);
1206
1207			tp->ftt_type = FASTTRAP_T_REG;
1208			if (A(instr) != 0)
1209				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1210			disp = DISP16(instr);
1211			disp <<= 16;
1212			disp >>= 14;
1213			tp->ftt_dest = pc + (intptr_t)disp;
1214			tp->ftt_code = RCOND(instr);
1215			break;
1216
1217		case OP2_SETHI:
1218			tp->ftt_type = FASTTRAP_T_SETHI;
1219			break;
1220
1221		case OP2_FBPfcc:
1222			if (COND(instr) == 8) {
1223				tp->ftt_type = FASTTRAP_T_ALWAYS;
1224			} else {
1225				tp->ftt_type = FASTTRAP_T_FCC;
1226				tp->ftt_cc = CC(instr);
1227				tp->ftt_code = COND(instr);
1228			}
1229
1230			if (A(instr) != 0)
1231				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1232
1233			disp = DISP19(instr);
1234			disp <<= 13;
1235			disp >>= 11;
1236			tp->ftt_dest = pc + (intptr_t)disp;
1237			break;
1238
1239		case OP2_FBfcc:
1240			if (COND(instr) == 8) {
1241				tp->ftt_type = FASTTRAP_T_ALWAYS;
1242			} else {
1243				tp->ftt_type = FASTTRAP_T_FCC;
1244				tp->ftt_cc = 0;
1245				tp->ftt_code = COND(instr);
1246			}
1247
1248			if (A(instr) != 0)
1249				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1250
1251			disp = DISP22(instr);
1252			disp <<= 10;
1253			disp >>= 8;
1254			tp->ftt_dest = pc + (intptr_t)disp;
1255			break;
1256		}
1257
1258	} else if (OP(instr) == 2) {
1259		switch (OP3(instr)) {
1260		case OP3_RETURN:
1261			tp->ftt_type = FASTTRAP_T_RETURN;
1262			break;
1263
1264		case OP3_JMPL:
1265			tp->ftt_type = FASTTRAP_T_JMPL;
1266			break;
1267
1268		case OP3_RD:
1269			if (RS1(instr) == 5)
1270				tp->ftt_type = FASTTRAP_T_RDPC;
1271			break;
1272
1273		case OP3_SAVE:
1274			/*
1275			 * We optimize for save instructions at function
1276			 * entry; see the comment in fasttrap_pid_probe()
1277			 * (near FASTTRAP_T_SAVE) for details.
1278			 */
1279			if (fasttrap_optimize_save != 0 &&
1280			    type == DTFTP_ENTRY &&
1281			    I(instr) == 1 && RD(instr) == R_SP)
1282				tp->ftt_type = FASTTRAP_T_SAVE;
1283			break;
1284
1285		case OP3_RESTORE:
1286			/*
1287			 * We optimize restore instructions at function
1288			 * return; see the comment in fasttrap_pid_probe()
1289			 * (near FASTTRAP_T_RESTORE) for details.
1290			 *
1291			 * rd must be an %o or %g register.
1292			 */
1293			if ((RD(instr) & 0x10) == 0)
1294				tp->ftt_type = FASTTRAP_T_RESTORE;
1295			break;
1296
1297		case OP3_OR:
1298			/*
1299			 * A large proportion of instructions in the delay
1300			 * slot of retl instructions are or's so we emulate
1301			 * these downstairs as an optimization.
1302			 */
1303			tp->ftt_type = FASTTRAP_T_OR;
1304			break;
1305
1306		case OP3_TCC:
1307			/*
1308			 * Breakpoint instructions are effectively position-
1309			 * dependent since the debugger uses the %pc value
1310			 * to lookup which breakpoint was executed. As a
1311			 * result, we can't actually instrument breakpoints.
1312			 */
1313			if (SW_TRAP(instr) == ST_BREAKPOINT)
1314				return (-1);
1315			break;
1316
1317		case 0x19:
1318		case 0x1d:
1319		case 0x29:
1320		case 0x33:
1321		case 0x3f:
1322			/*
1323			 * Identify illegal instructions (See SPARC
1324			 * Architecture Manual Version 9, E.2 table 32).
1325			 */
1326			return (-1);
1327		}
1328	} else if (OP(instr) == 3) {
1329		uint32_t op3 = OP3(instr);
1330
1331		/*
1332		 * Identify illegal instructions (See SPARC Architecture
1333		 * Manual Version 9, E.2 table 33).
1334		 */
1335		if ((op3 & 0x28) == 0x28) {
1336			if (op3 != OP3_PREFETCH && op3 != OP3_CASA &&
1337			    op3 != OP3_PREFETCHA && op3 != OP3_CASXA)
1338				return (-1);
1339		} else {
1340			if ((op3 & 0x0f) == 0x0c || (op3 & 0x3b) == 0x31)
1341				return (-1);
1342		}
1343	}
1344
1345	tp->ftt_instr = instr;
1346
1347	/*
1348	 * We don't know how this tracepoint is going to be used, but in case
1349	 * it's used as part of a function return probe, we need to indicate
1350	 * whether it's always a return site or only potentially a return
1351	 * site. If it's part of a return probe, it's always going to be a
1352	 * return from that function if it's a restore instruction or if
1353	 * the previous instruction was a return. If we could reliably
1354	 * distinguish jump tables from return sites, this wouldn't be
1355	 * necessary.
1356	 */
1357	if (tp->ftt_type != FASTTRAP_T_RESTORE &&
1358	    (uread(p, &instr, 4, pc - sizeof (instr)) != 0 ||
1359	    !(OP(instr) == 2 && OP3(instr) == OP3_RETURN)))
1360		tp->ftt_flags |= FASTTRAP_F_RETMAYBE;
1361
1362	return (0);
1363}
1364
1365/*ARGSUSED*/
1366uint64_t
1367fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1368    int aframes)
1369{
1370	return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1371}
1372
1373/*ARGSUSED*/
1374uint64_t
1375fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1376    int aframes)
1377{
1378	return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1379}
1380
1381static uint64_t fasttrap_getreg_fast_cnt;
1382static uint64_t fasttrap_getreg_mpcb_cnt;
1383static uint64_t fasttrap_getreg_slow_cnt;
1384
1385static ulong_t
1386fasttrap_getreg(struct regs *rp, uint_t reg)
1387{
1388	ulong_t value;
1389	dtrace_icookie_t cookie;
1390	struct machpcb *mpcb;
1391	extern ulong_t dtrace_getreg_win(uint_t, uint_t);
1392
1393	/*
1394	 * We have the %os and %gs in our struct regs, but if we need to
1395	 * snag a %l or %i we need to go scrounging around in the process's
1396	 * address space.
1397	 */
1398	if (reg == 0)
1399		return (0);
1400
1401	if (reg < 16)
1402		return ((&rp->r_g1)[reg - 1]);
1403
1404	/*
1405	 * Before we look at the user's stack, we'll check the register
1406	 * windows to see if the information we want is in there.
1407	 */
1408	cookie = dtrace_interrupt_disable();
1409	if (dtrace_getotherwin() > 0) {
1410		value = dtrace_getreg_win(reg, 1);
1411		dtrace_interrupt_enable(cookie);
1412
1413		atomic_add_64(&fasttrap_getreg_fast_cnt, 1);
1414
1415		return (value);
1416	}
1417	dtrace_interrupt_enable(cookie);
1418
1419	/*
1420	 * First check the machpcb structure to see if we've already read
1421	 * in the register window we're looking for; if we haven't, (and
1422	 * we probably haven't) try to copy in the value of the register.
1423	 */
1424	/* LINTED - alignment */
1425	mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1426
1427	if (get_udatamodel() == DATAMODEL_NATIVE) {
1428		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1429
1430		if (mpcb->mpcb_wbcnt > 0) {
1431			struct rwindow *rwin = (void *)mpcb->mpcb_wbuf;
1432			int i = mpcb->mpcb_wbcnt;
1433			do {
1434				i--;
1435				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1436					continue;
1437
1438				atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1);
1439				return (rwin[i].rw_local[reg - 16]);
1440			} while (i > 0);
1441		}
1442
1443		if (fasttrap_fulword(&fr->fr_local[reg - 16], &value) != 0)
1444			goto err;
1445	} else {
1446		struct frame32 *fr =
1447		    (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1448		uint32_t *v32 = (uint32_t *)&value;
1449
1450		if (mpcb->mpcb_wbcnt > 0) {
1451			struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf;
1452			int i = mpcb->mpcb_wbcnt;
1453			do {
1454				i--;
1455				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1456					continue;
1457
1458				atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1);
1459				return (rwin[i].rw_local[reg - 16]);
1460			} while (i > 0);
1461		}
1462
1463		if (fasttrap_fuword32(&fr->fr_local[reg - 16], &v32[1]) != 0)
1464			goto err;
1465
1466		v32[0] = 0;
1467	}
1468
1469	atomic_add_64(&fasttrap_getreg_slow_cnt, 1);
1470	return (value);
1471
1472err:
1473	/*
1474	 * If the copy in failed, the process will be in a irrecoverable
1475	 * state, and we have no choice but to kill it.
1476	 */
1477	kern_psignal(ttoproc(curthread), SIGILL);
1478	return (0);
1479}
1480
1481static uint64_t fasttrap_putreg_fast_cnt;
1482static uint64_t fasttrap_putreg_mpcb_cnt;
1483static uint64_t fasttrap_putreg_slow_cnt;
1484
1485static void
1486fasttrap_putreg(struct regs *rp, uint_t reg, ulong_t value)
1487{
1488	dtrace_icookie_t cookie;
1489	struct machpcb *mpcb;
1490	extern void dtrace_putreg_win(uint_t, ulong_t);
1491
1492	if (reg == 0)
1493		return;
1494
1495	if (reg < 16) {
1496		(&rp->r_g1)[reg - 1] = value;
1497		return;
1498	}
1499
1500	/*
1501	 * If the user process is still using some register windows, we
1502	 * can just place the value in the correct window.
1503	 */
1504	cookie = dtrace_interrupt_disable();
1505	if (dtrace_getotherwin() > 0) {
1506		dtrace_putreg_win(reg, value);
1507		dtrace_interrupt_enable(cookie);
1508		atomic_add_64(&fasttrap_putreg_fast_cnt, 1);
1509		return;
1510	}
1511	dtrace_interrupt_enable(cookie);
1512
1513	/*
1514	 * First see if there's a copy of the register window in the
1515	 * machpcb structure that we can modify; if there isn't try to
1516	 * copy out the value. If that fails, we try to create a new
1517	 * register window in the machpcb structure. While this isn't
1518	 * _precisely_ the intended use of the machpcb structure, it
1519	 * can't cause any problems since we know at this point in the
1520	 * code that all of the user's data have been flushed out of the
1521	 * register file (since %otherwin is 0).
1522	 */
1523	/* LINTED - alignment */
1524	mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1525
1526	if (get_udatamodel() == DATAMODEL_NATIVE) {
1527		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1528		/* LINTED - alignment */
1529		struct rwindow *rwin = (struct rwindow *)mpcb->mpcb_wbuf;
1530
1531		if (mpcb->mpcb_wbcnt > 0) {
1532			int i = mpcb->mpcb_wbcnt;
1533			do {
1534				i--;
1535				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1536					continue;
1537
1538				rwin[i].rw_local[reg - 16] = value;
1539				atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1540				return;
1541			} while (i > 0);
1542		}
1543
1544		if (fasttrap_sulword(&fr->fr_local[reg - 16], value) != 0) {
1545			if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1546			    &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1547				goto err;
1548
1549			rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = value;
1550			mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1551			mpcb->mpcb_wbcnt++;
1552			atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1553			return;
1554		}
1555	} else {
1556		struct frame32 *fr =
1557		    (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1558		/* LINTED - alignment */
1559		struct rwindow32 *rwin = (struct rwindow32 *)mpcb->mpcb_wbuf;
1560		uint32_t v32 = (uint32_t)value;
1561
1562		if (mpcb->mpcb_wbcnt > 0) {
1563			int i = mpcb->mpcb_wbcnt;
1564			do {
1565				i--;
1566				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1567					continue;
1568
1569				rwin[i].rw_local[reg - 16] = v32;
1570				atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1571				return;
1572			} while (i > 0);
1573		}
1574
1575		if (fasttrap_suword32(&fr->fr_local[reg - 16], v32) != 0) {
1576			if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1577			    &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1578				goto err;
1579
1580			rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = v32;
1581			mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1582			mpcb->mpcb_wbcnt++;
1583			atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1584			return;
1585		}
1586	}
1587
1588	atomic_add_64(&fasttrap_putreg_slow_cnt, 1);
1589	return;
1590
1591err:
1592	/*
1593	 * If we couldn't record this register's value, the process is in an
1594	 * irrecoverable state and we have no choice but to euthanize it.
1595	 */
1596	kern_psignal(ttoproc(curthread), SIGILL);
1597}
1598