1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * #pragma ident	"@(#)dtrace_subr.c	1.16	07/09/18 SMI"
29 */
30
31#include <sys/dtrace.h>
32#include <sys/dtrace_glue.h>
33#include <sys/dtrace_impl.h>
34#include <sys/fasttrap.h>
35#include <sys/vm.h>
36#include <sys/user.h>
37#include <sys/kauth.h>
38#include <kern/debug.h>
39
40int (*dtrace_pid_probe_ptr)(x86_saved_state_t *);
41int (*dtrace_return_probe_ptr)(x86_saved_state_t *);
42
43/*
44 * HACK! There doesn't seem to be an easy way to include trap.h from
45 * here. FIXME!
46 */
47#define	T_INT3			3		/* int 3 instruction */
48#define T_DTRACE_RET		0x7f		/* DTrace pid return */
49
50kern_return_t
51dtrace_user_probe(x86_saved_state_t *);
52
53kern_return_t
54dtrace_user_probe(x86_saved_state_t *regs)
55{
56	x86_saved_state64_t *regs64;
57	x86_saved_state32_t *regs32;
58        int trapno;
59
60	/*
61	 * FIXME!
62	 *
63	 * The only call path into this method is always a user trap.
64	 * We don't need to test for user trap, but should assert it.
65	 */
66	boolean_t user_mode = TRUE;
67
68        if (is_saved_state64(regs) == TRUE) {
69                regs64 = saved_state64(regs);
70		regs32 = NULL;
71                trapno = regs64->isf.trapno;
72                user_mode = TRUE; // By default, because xnu is 32 bit only
73        } else {
74		regs64 = NULL;
75                regs32 = saved_state32(regs);
76                if (regs32->cs & 0x03) user_mode = TRUE;
77                trapno = regs32->trapno;
78        }
79
80	lck_rw_t *rwp;
81	struct proc *p = current_proc();
82
83	uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
84	if (user_mode /*|| (rp->r_ps & PS_VM)*/) {
85		/*
86		 * DTrace accesses t_cred in probe context.  t_cred
87		 * must always be either NULL, or point to a valid,
88		 * allocated cred structure.
89		 */
90		kauth_cred_uthread_update(uthread, p);
91	}
92
93	if (trapno == T_DTRACE_RET) {
94		uint8_t step = uthread->t_dtrace_step;
95		uint8_t ret = uthread->t_dtrace_ret;
96		user_addr_t npc = uthread->t_dtrace_npc;
97
98		if (uthread->t_dtrace_ast) {
99			printf("dtrace_user_probe() should be calling aston()\n");
100			// aston(uthread);
101			// uthread->t_sig_check = 1;
102		}
103
104		/*
105		 * Clear all user tracing flags.
106		 */
107		uthread->t_dtrace_ft = 0;
108
109		/*
110		 * If we weren't expecting to take a return probe trap, kill
111		 * the process as though it had just executed an unassigned
112		 * trap instruction.
113		 */
114		if (step == 0) {
115			/*
116			 * APPLE NOTE: We're returning KERN_FAILURE, which causes
117			 * the generic signal handling code to take over, which will effectively
118			 * deliver a EXC_BAD_INSTRUCTION to the user process.
119			 */
120	 		return KERN_FAILURE;
121		}
122
123		/*
124		 * If we hit this trap unrelated to a return probe, we're
125		 * just here to reset the AST flag since we deferred a signal
126		 * until after we logically single-stepped the instruction we
127		 * copied out.
128		 */
129		if (ret == 0) {
130			if (regs64) {
131				regs64->isf.rip = npc;
132			} else {
133				regs32->eip = npc;
134			}
135			return KERN_SUCCESS;
136		}
137
138		/*
139		 * We need to wait until after we've called the
140		 * dtrace_return_probe_ptr function pointer to set %pc.
141		 */
142		rwp = &CPU->cpu_ft_lock;
143		lck_rw_lock_shared(rwp);
144
145		if (dtrace_return_probe_ptr != NULL)
146			(void) (*dtrace_return_probe_ptr)(regs);
147		lck_rw_unlock_shared(rwp);
148
149		if (regs64) {
150			regs64->isf.rip = npc;
151		} else {
152			regs32->eip = npc;
153		}
154
155		return KERN_SUCCESS;
156	} else if (trapno == T_INT3) {
157		uint8_t instr, instr2;
158		rwp = &CPU->cpu_ft_lock;
159
160		/*
161		 * The DTrace fasttrap provider uses the breakpoint trap
162		 * (int 3). We let DTrace take the first crack at handling
163		 * this trap; if it's not a probe that DTrace knowns about,
164		 * we call into the trap() routine to handle it like a
165		 * breakpoint placed by a conventional debugger.
166		 */
167
168		/*
169		 * APPLE NOTE: I believe the purpose of the reader/writers lock
170		 * is thus: There are times which dtrace needs to prevent calling
171		 * dtrace_pid_probe_ptr(). Sun's original impl grabbed a plain
172		 * mutex here. However, that serialized all probe calls, and
173		 * destroyed MP behavior. So now they use a RW lock, with probes
174		 * as readers, and the top level synchronization as a writer.
175		 */
176		lck_rw_lock_shared(rwp);
177		if (dtrace_pid_probe_ptr != NULL &&
178		    (*dtrace_pid_probe_ptr)(regs) == 0) {
179			lck_rw_unlock_shared(rwp);
180			return KERN_SUCCESS;
181		}
182		lck_rw_unlock_shared(rwp);
183
184
185		/*
186		 * If the instruction that caused the breakpoint trap doesn't
187		 * look like an int 3 anymore, it may be that this tracepoint
188		 * was removed just after the user thread executed it. In
189		 * that case, return to user land to retry the instuction.
190		 */
191		user_addr_t pc = (regs64) ? regs64->isf.rip : (user_addr_t)regs32->eip;
192		if (fuword8(pc - 1, &instr) == 0 && instr != FASTTRAP_INSTR && // neither single-byte INT3 (0xCC)
193			!(instr == 3 && fuword8(pc - 2, &instr2) == 0 && instr2 == 0xCD)) { // nor two-byte INT 3 (0xCD03)
194			if (regs64) {
195				regs64->isf.rip--;
196			} else {
197				regs32->eip--;
198			}
199			return KERN_SUCCESS;
200		}
201
202	}
203
204	return KERN_FAILURE;
205}
206
207void
208dtrace_safe_synchronous_signal(void)
209{
210#if 0
211	kthread_t *t = curthread;
212	struct regs *rp = lwptoregs(ttolwp(t));
213	size_t isz = t->t_dtrace_npc - t->t_dtrace_pc;
214
215	ASSERT(t->t_dtrace_on);
216
217	/*
218	 * If we're not in the range of scratch addresses, we're not actually
219	 * tracing user instructions so turn off the flags. If the instruction
220	 * we copied out caused a synchonous trap, reset the pc back to its
221	 * original value and turn off the flags.
222	 */
223	if (rp->r_pc < t->t_dtrace_scrpc ||
224			rp->r_pc > t->t_dtrace_astpc + isz) {
225		t->t_dtrace_ft = 0;
226	} else if (rp->r_pc == t->t_dtrace_scrpc ||
227			rp->r_pc == t->t_dtrace_astpc) {
228		rp->r_pc = t->t_dtrace_pc;
229		t->t_dtrace_ft = 0;
230	}
231#endif /* 0 */
232}
233
234int
235dtrace_safe_defer_signal(void)
236{
237#if 0
238	kthread_t *t = curthread;
239	struct regs *rp = lwptoregs(ttolwp(t));
240	size_t isz = t->t_dtrace_npc - t->t_dtrace_pc;
241
242	ASSERT(t->t_dtrace_on);
243
244	/*
245	 * If we're not in the range of scratch addresses, we're not actually
246	 * tracing user instructions so turn off the flags.
247	 */
248	if (rp->r_pc < t->t_dtrace_scrpc ||
249			rp->r_pc > t->t_dtrace_astpc + isz) {
250		t->t_dtrace_ft = 0;
251		return (0);
252	}
253
254	/*
255	 * If we've executed the original instruction, but haven't performed
256	 * the jmp back to t->t_dtrace_npc or the clean up of any registers
257	 * used to emulate %rip-relative instructions in 64-bit mode, do that
258	 * here and take the signal right away. We detect this condition by
259	 * seeing if the program counter is the range [scrpc + isz, astpc).
260	 */
261	if (t->t_dtrace_astpc - rp->r_pc <
262			t->t_dtrace_astpc - t->t_dtrace_scrpc - isz) {
263#ifdef __sol64
264		/*
265		 * If there is a scratch register and we're on the
266		 * instruction immediately after the modified instruction,
267		 * restore the value of that scratch register.
268		 */
269		if (t->t_dtrace_reg != 0 &&
270				rp->r_pc == t->t_dtrace_scrpc + isz) {
271			switch (t->t_dtrace_reg) {
272				case REG_RAX:
273					rp->r_rax = t->t_dtrace_regv;
274					break;
275				case REG_RCX:
276					rp->r_rcx = t->t_dtrace_regv;
277					break;
278				case REG_R8:
279					rp->r_r8 = t->t_dtrace_regv;
280					break;
281				case REG_R9:
282					rp->r_r9 = t->t_dtrace_regv;
283					break;
284			}
285		}
286#endif
287		rp->r_pc = t->t_dtrace_npc;
288		t->t_dtrace_ft = 0;
289		return (0);
290	}
291
292	/*
293	 * Otherwise, make sure we'll return to the kernel after executing
294	 * the copied out instruction and defer the signal.
295	 */
296	if (!t->t_dtrace_step) {
297		ASSERT(rp->r_pc < t->t_dtrace_astpc);
298		rp->r_pc += t->t_dtrace_astpc - t->t_dtrace_scrpc;
299		t->t_dtrace_step = 1;
300	}
301
302	t->t_dtrace_ast = 1;
303
304	return (1);
305
306#endif /* 0 */
307
308	return 0;
309}
310