1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 *
21 * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
22 *
23 */
24
25/*
26 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27 * Use is subject to license terms.
28 */
29
30#include <sys/param.h>
31
32#include <sys/dtrace.h>
33
34#include <machine/cpufunc.h>
35#include <machine/md_var.h>
36
37#include "fbt.h"
38
39#define	FBT_PUSHL_EBP		0x55
40#define	FBT_MOVL_ESP_EBP0_V0	0x8b
41#define	FBT_MOVL_ESP_EBP1_V0	0xec
42#define	FBT_MOVL_ESP_EBP0_V1	0x89
43#define	FBT_MOVL_ESP_EBP1_V1	0xe5
44#define	FBT_REX_RSP_RBP		0x48
45
46#define	FBT_POPL_EBP		0x5d
47#define	FBT_RET			0xc3
48#define	FBT_RET_IMM16		0xc2
49#define	FBT_LEAVE		0xc9
50
51#ifdef __amd64__
52#define	FBT_PATCHVAL		0xcc
53#else
54#define	FBT_PATCHVAL		0xf0
55#endif
56
57#define FBT_AFRAMES 2
58
59int
60fbt_invop(uintptr_t addr, struct trapframe *frame, uintptr_t scratch __unused)
61{
62	solaris_cpu_t *cpu;
63	uintptr_t *stack;
64	uintptr_t arg0, arg1, arg2, arg3, arg4, rval;
65	fbt_probe_t *fbt;
66	int8_t fbtrval;
67
68#ifdef __amd64__
69	stack = (uintptr_t *)frame->tf_rsp;
70	rval = frame->tf_rax;
71#else
72	/* Skip hardware-saved registers. */
73	stack = (uintptr_t *)frame->tf_isp + 3;
74	rval = frame->tf_eax;
75#endif
76
77	cpu = &solaris_cpu[curcpu];
78	fbt = fbt_probetab[FBT_ADDR2NDX(addr)];
79	for (; fbt != NULL; fbt = fbt->fbtp_hashnext) {
80		if ((uintptr_t)fbt->fbtp_patchpoint != addr)
81			continue;
82		fbtrval = fbt->fbtp_rval;
83
84		/*
85		 * Report the address of the breakpoint for the benefit
86		 * of consumers fetching register values with regs[].
87		 */
88#ifdef __i386__
89		frame->tf_eip--;
90#else
91		frame->tf_rip--;
92#endif
93		for (; fbt != NULL; fbt = fbt->fbtp_tracenext) {
94			ASSERT(fbt->fbtp_rval == fbtrval);
95			if (fbt->fbtp_roffset == 0) {
96#ifdef __amd64__
97				/* fbt->fbtp_rval == DTRACE_INVOP_PUSHQ_RBP */
98				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
99				cpu->cpu_dtrace_caller = stack[0];
100				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT |
101				    CPU_DTRACE_BADADDR);
102
103				arg0 = frame->tf_rdi;
104				arg1 = frame->tf_rsi;
105				arg2 = frame->tf_rdx;
106				arg3 = frame->tf_rcx;
107				arg4 = frame->tf_r8;
108#else
109				int i = 0;
110
111				/*
112				 * When accessing the arguments on the stack,
113				 * we must protect against accessing beyond
114				 * the stack.  We can safely set NOFAULT here
115				 * -- we know that interrupts are already
116				 * disabled.
117				 */
118				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
119				cpu->cpu_dtrace_caller = stack[i++];
120				arg0 = stack[i++];
121				arg1 = stack[i++];
122				arg2 = stack[i++];
123				arg3 = stack[i++];
124				arg4 = stack[i++];
125				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT |
126				    CPU_DTRACE_BADADDR);
127#endif
128
129				dtrace_probe(fbt->fbtp_id, arg0, arg1,
130				    arg2, arg3, arg4);
131
132				cpu->cpu_dtrace_caller = 0;
133			} else {
134#ifdef __amd64__
135				/*
136				 * On amd64, we instrument the ret, not the
137				 * leave.  We therefore need to set the caller
138				 * to ensure that the top frame of a stack()
139				 * action is correct.
140				 */
141				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
142				cpu->cpu_dtrace_caller = stack[0];
143				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT |
144				    CPU_DTRACE_BADADDR);
145#endif
146
147				dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset,
148				    rval, 0, 0, 0);
149				cpu->cpu_dtrace_caller = 0;
150			}
151		}
152		/* Advance to the instruction following the breakpoint. */
153#ifdef __i386__
154		frame->tf_eip++;
155#else
156		frame->tf_rip++;
157#endif
158		return (fbtrval);
159	}
160
161	return (0);
162}
163
164void
165fbt_patch_tracepoint(fbt_probe_t *fbt, fbt_patchval_t val)
166{
167	register_t intr;
168	bool old_wp;
169
170	intr = intr_disable();
171	old_wp = disable_wp();
172	*fbt->fbtp_patchpoint = val;
173	restore_wp(old_wp);
174	intr_restore(intr);
175}
176
177int
178fbt_provide_module_function(linker_file_t lf, int symindx,
179    linker_symval_t *symval, void *opaque)
180{
181	char *modname = opaque;
182	const char *name = symval->name;
183	fbt_probe_t *fbt, *hash, *retfbt;
184	int j;
185	int size;
186	uint8_t *instr, *limit;
187
188	if (fbt_excluded(name))
189		return (0);
190
191	/*
192	 * trap_check() is a wrapper for DTrace's fault handler, so we don't
193	 * want to be able to instrument it.
194	 */
195	if (strcmp(name, "trap_check") == 0)
196		return (0);
197
198	size = symval->size;
199
200	instr = (uint8_t *) symval->value;
201	limit = (uint8_t *) symval->value + symval->size;
202
203#ifdef __amd64__
204	while (instr < limit) {
205		if (*instr == FBT_PUSHL_EBP)
206			break;
207
208		if ((size = dtrace_instr_size(instr)) <= 0)
209			break;
210
211		instr += size;
212	}
213
214	if (instr >= limit || *instr != FBT_PUSHL_EBP) {
215		/*
216		 * We either don't save the frame pointer in this
217		 * function, or we ran into some disassembly
218		 * screw-up.  Either way, we bail.
219		 */
220		return (0);
221	}
222#else
223	if (instr[0] != FBT_PUSHL_EBP)
224		return (0);
225
226	if (!(instr[1] == FBT_MOVL_ESP_EBP0_V0 &&
227	    instr[2] == FBT_MOVL_ESP_EBP1_V0) &&
228	    !(instr[1] == FBT_MOVL_ESP_EBP0_V1 &&
229	    instr[2] == FBT_MOVL_ESP_EBP1_V1))
230		return (0);
231#endif
232
233	fbt = malloc(sizeof (fbt_probe_t), M_FBT, M_WAITOK | M_ZERO);
234	fbt->fbtp_name = name;
235	fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
236	    name, FBT_ENTRY, FBT_AFRAMES, fbt);
237	fbt->fbtp_patchpoint = instr;
238	fbt->fbtp_ctl = lf;
239	fbt->fbtp_loadcnt = lf->loadcnt;
240	fbt->fbtp_rval = DTRACE_INVOP_PUSHL_EBP;
241	fbt->fbtp_savedval = *instr;
242	fbt->fbtp_patchval = FBT_PATCHVAL;
243	fbt->fbtp_symindx = symindx;
244
245	for (hash = fbt_probetab[FBT_ADDR2NDX(instr)]; hash != NULL;
246	    hash = hash->fbtp_hashnext) {
247		if (hash->fbtp_patchpoint == fbt->fbtp_patchpoint) {
248			fbt->fbtp_tracenext = hash->fbtp_tracenext;
249			hash->fbtp_tracenext = fbt;
250			break;
251		}
252	}
253	if (hash == NULL) {
254		fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
255		fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
256	}
257
258	lf->fbt_nentries++;
259
260	retfbt = NULL;
261again:
262	if (instr >= limit)
263		return (0);
264
265	/*
266	 * If this disassembly fails, then we've likely walked off into
267	 * a jump table or some other unsuitable area.  Bail out of the
268	 * disassembly now.
269	 */
270	if ((size = dtrace_instr_size(instr)) <= 0)
271		return (0);
272
273#ifdef __amd64__
274	/*
275	 * We only instrument "ret" on amd64 -- we don't yet instrument
276	 * ret imm16, largely because the compiler doesn't seem to
277	 * (yet) emit them in the kernel...
278	 */
279	if (*instr != FBT_RET) {
280		instr += size;
281		goto again;
282	}
283#else
284	if (!(size == 1 &&
285	    (*instr == FBT_POPL_EBP || *instr == FBT_LEAVE) &&
286	    (*(instr + 1) == FBT_RET ||
287	    *(instr + 1) == FBT_RET_IMM16))) {
288		instr += size;
289		goto again;
290	}
291#endif
292
293	/*
294	 * We (desperately) want to avoid erroneously instrumenting a
295	 * jump table, especially given that our markers are pretty
296	 * short:  two bytes on x86, and just one byte on amd64.  To
297	 * determine if we're looking at a true instruction sequence
298	 * or an inline jump table that happens to contain the same
299	 * byte sequences, we resort to some heuristic sleeze:  we
300	 * treat this instruction as being contained within a pointer,
301	 * and see if that pointer points to within the body of the
302	 * function.  If it does, we refuse to instrument it.
303	 */
304	for (j = 0; j < sizeof (uintptr_t); j++) {
305		caddr_t check = (caddr_t) instr - j;
306		uint8_t *ptr;
307
308		if (check < symval->value)
309			break;
310
311		if (check + sizeof (caddr_t) > (caddr_t)limit)
312			continue;
313
314		ptr = *(uint8_t **)check;
315
316		if (ptr >= (uint8_t *) symval->value && ptr < limit) {
317			instr += size;
318			goto again;
319		}
320	}
321
322	/*
323	 * We have a winner!
324	 */
325	fbt = malloc(sizeof (fbt_probe_t), M_FBT, M_WAITOK | M_ZERO);
326	fbt->fbtp_name = name;
327
328	if (retfbt == NULL) {
329		fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
330		    name, FBT_RETURN, FBT_AFRAMES, fbt);
331	} else {
332		retfbt->fbtp_probenext = fbt;
333		fbt->fbtp_id = retfbt->fbtp_id;
334	}
335
336	retfbt = fbt;
337	fbt->fbtp_patchpoint = instr;
338	fbt->fbtp_ctl = lf;
339	fbt->fbtp_loadcnt = lf->loadcnt;
340	fbt->fbtp_symindx = symindx;
341
342#ifndef __amd64__
343	if (*instr == FBT_POPL_EBP) {
344		fbt->fbtp_rval = DTRACE_INVOP_POPL_EBP;
345	} else {
346		ASSERT(*instr == FBT_LEAVE);
347		fbt->fbtp_rval = DTRACE_INVOP_LEAVE;
348	}
349	fbt->fbtp_roffset =
350	    (uintptr_t)(instr - (uint8_t *) symval->value) + 1;
351
352#else
353	ASSERT(*instr == FBT_RET);
354	fbt->fbtp_rval = DTRACE_INVOP_RET;
355	fbt->fbtp_roffset =
356	    (uintptr_t)(instr - (uint8_t *) symval->value);
357#endif
358
359	fbt->fbtp_savedval = *instr;
360	fbt->fbtp_patchval = FBT_PATCHVAL;
361	fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
362	fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
363
364	lf->fbt_nentries++;
365
366	instr += size;
367	goto again;
368}
369