1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 *
21 * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
22 *
23 * $FreeBSD$
24 *
25 */
26
27/*
28 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
29 * Use is subject to license terms.
30 */
31
32#include <sys/cdefs.h>
33#include <sys/param.h>
34
35#include <sys/dtrace.h>
36
37#include <machine/cpufunc.h>
38#include <machine/md_var.h>
39
40#include "fbt.h"
41
42#define	FBT_PUSHL_EBP		0x55
43#define	FBT_MOVL_ESP_EBP0_V0	0x8b
44#define	FBT_MOVL_ESP_EBP1_V0	0xec
45#define	FBT_MOVL_ESP_EBP0_V1	0x89
46#define	FBT_MOVL_ESP_EBP1_V1	0xe5
47#define	FBT_REX_RSP_RBP		0x48
48
49#define	FBT_POPL_EBP		0x5d
50#define	FBT_RET			0xc3
51#define	FBT_RET_IMM16		0xc2
52#define	FBT_LEAVE		0xc9
53
54#ifdef __amd64__
55#define	FBT_PATCHVAL		0xcc
56#else
57#define	FBT_PATCHVAL		0xf0
58#endif
59
60#define	FBT_ENTRY	"entry"
61#define	FBT_RETURN	"return"
62
63int
64fbt_invop(uintptr_t addr, struct trapframe *frame, uintptr_t rval)
65{
66	solaris_cpu_t *cpu;
67	uintptr_t *stack;
68	uintptr_t arg0, arg1, arg2, arg3, arg4;
69	fbt_probe_t *fbt;
70	int8_t fbtrval;
71
72#ifdef __amd64__
73	stack = (uintptr_t *)frame->tf_rsp;
74#else
75	/* Skip hardware-saved registers. */
76	stack = (uintptr_t *)frame->tf_isp + 3;
77#endif
78
79	cpu = &solaris_cpu[curcpu];
80	fbt = fbt_probetab[FBT_ADDR2NDX(addr)];
81	for (; fbt != NULL; fbt = fbt->fbtp_hashnext) {
82		if ((uintptr_t)fbt->fbtp_patchpoint != addr)
83			continue;
84		fbtrval = fbt->fbtp_rval;
85		for (; fbt != NULL; fbt = fbt->fbtp_tracenext) {
86			ASSERT(fbt->fbtp_rval == fbtrval);
87			if (fbt->fbtp_roffset == 0) {
88#ifdef __amd64__
89				/* fbt->fbtp_rval == DTRACE_INVOP_PUSHQ_RBP */
90				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
91				cpu->cpu_dtrace_caller = stack[0];
92				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT |
93				    CPU_DTRACE_BADADDR);
94
95				arg0 = frame->tf_rdi;
96				arg1 = frame->tf_rsi;
97				arg2 = frame->tf_rdx;
98				arg3 = frame->tf_rcx;
99				arg4 = frame->tf_r8;
100#else
101				int i = 0;
102
103				/*
104				 * When accessing the arguments on the stack,
105				 * we must protect against accessing beyond
106				 * the stack.  We can safely set NOFAULT here
107				 * -- we know that interrupts are already
108				 * disabled.
109				 */
110				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
111				cpu->cpu_dtrace_caller = stack[i++];
112				arg0 = stack[i++];
113				arg1 = stack[i++];
114				arg2 = stack[i++];
115				arg3 = stack[i++];
116				arg4 = stack[i++];
117				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT |
118				    CPU_DTRACE_BADADDR);
119#endif
120
121				dtrace_probe(fbt->fbtp_id, arg0, arg1,
122				    arg2, arg3, arg4);
123
124				cpu->cpu_dtrace_caller = 0;
125			} else {
126#ifdef __amd64__
127				/*
128				 * On amd64, we instrument the ret, not the
129				 * leave.  We therefore need to set the caller
130				 * to ensure that the top frame of a stack()
131				 * action is correct.
132				 */
133				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
134				cpu->cpu_dtrace_caller = stack[0];
135				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT |
136				    CPU_DTRACE_BADADDR);
137#endif
138
139				dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset,
140				    rval, 0, 0, 0);
141				cpu->cpu_dtrace_caller = 0;
142			}
143		}
144		return (fbtrval);
145	}
146
147	return (0);
148}
149
150void
151fbt_patch_tracepoint(fbt_probe_t *fbt, fbt_patchval_t val)
152{
153	register_t intr;
154	bool old_wp;
155
156	intr = intr_disable();
157	old_wp = disable_wp();
158	*fbt->fbtp_patchpoint = val;
159	restore_wp(old_wp);
160	intr_restore(intr);
161}
162
163int
164fbt_provide_module_function(linker_file_t lf, int symindx,
165    linker_symval_t *symval, void *opaque)
166{
167	char *modname = opaque;
168	const char *name = symval->name;
169	fbt_probe_t *fbt, *hash, *retfbt;
170	int j;
171	int size;
172	uint8_t *instr, *limit;
173
174	if (fbt_excluded(name))
175		return (0);
176
177	/*
178	 * trap_check() is a wrapper for DTrace's fault handler, so we don't
179	 * want to be able to instrument it.
180	 */
181	if (strcmp(name, "trap_check") == 0)
182		return (0);
183
184	size = symval->size;
185
186	instr = (uint8_t *) symval->value;
187	limit = (uint8_t *) symval->value + symval->size;
188
189#ifdef __amd64__
190	while (instr < limit) {
191		if (*instr == FBT_PUSHL_EBP)
192			break;
193
194		if ((size = dtrace_instr_size(instr)) <= 0)
195			break;
196
197		instr += size;
198	}
199
200	if (instr >= limit || *instr != FBT_PUSHL_EBP) {
201		/*
202		 * We either don't save the frame pointer in this
203		 * function, or we ran into some disassembly
204		 * screw-up.  Either way, we bail.
205		 */
206		return (0);
207	}
208#else
209	if (instr[0] != FBT_PUSHL_EBP)
210		return (0);
211
212	if (!(instr[1] == FBT_MOVL_ESP_EBP0_V0 &&
213	    instr[2] == FBT_MOVL_ESP_EBP1_V0) &&
214	    !(instr[1] == FBT_MOVL_ESP_EBP0_V1 &&
215	    instr[2] == FBT_MOVL_ESP_EBP1_V1))
216		return (0);
217#endif
218
219	fbt = malloc(sizeof (fbt_probe_t), M_FBT, M_WAITOK | M_ZERO);
220	fbt->fbtp_name = name;
221	fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
222	    name, FBT_ENTRY, 3, fbt);
223	fbt->fbtp_patchpoint = instr;
224	fbt->fbtp_ctl = lf;
225	fbt->fbtp_loadcnt = lf->loadcnt;
226	fbt->fbtp_rval = DTRACE_INVOP_PUSHL_EBP;
227	fbt->fbtp_savedval = *instr;
228	fbt->fbtp_patchval = FBT_PATCHVAL;
229	fbt->fbtp_symindx = symindx;
230
231	for (hash = fbt_probetab[FBT_ADDR2NDX(instr)]; hash != NULL;
232	    hash = hash->fbtp_hashnext) {
233		if (hash->fbtp_patchpoint == fbt->fbtp_patchpoint) {
234			fbt->fbtp_tracenext = hash->fbtp_tracenext;
235			hash->fbtp_tracenext = fbt;
236			break;
237		}
238	}
239	if (hash == NULL) {
240		fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
241		fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
242	}
243
244	lf->fbt_nentries++;
245
246	retfbt = NULL;
247again:
248	if (instr >= limit)
249		return (0);
250
251	/*
252	 * If this disassembly fails, then we've likely walked off into
253	 * a jump table or some other unsuitable area.  Bail out of the
254	 * disassembly now.
255	 */
256	if ((size = dtrace_instr_size(instr)) <= 0)
257		return (0);
258
259#ifdef __amd64__
260	/*
261	 * We only instrument "ret" on amd64 -- we don't yet instrument
262	 * ret imm16, largely because the compiler doesn't seem to
263	 * (yet) emit them in the kernel...
264	 */
265	if (*instr != FBT_RET) {
266		instr += size;
267		goto again;
268	}
269#else
270	if (!(size == 1 &&
271	    (*instr == FBT_POPL_EBP || *instr == FBT_LEAVE) &&
272	    (*(instr + 1) == FBT_RET ||
273	    *(instr + 1) == FBT_RET_IMM16))) {
274		instr += size;
275		goto again;
276	}
277#endif
278
279	/*
280	 * We (desperately) want to avoid erroneously instrumenting a
281	 * jump table, especially given that our markers are pretty
282	 * short:  two bytes on x86, and just one byte on amd64.  To
283	 * determine if we're looking at a true instruction sequence
284	 * or an inline jump table that happens to contain the same
285	 * byte sequences, we resort to some heuristic sleeze:  we
286	 * treat this instruction as being contained within a pointer,
287	 * and see if that pointer points to within the body of the
288	 * function.  If it does, we refuse to instrument it.
289	 */
290	for (j = 0; j < sizeof (uintptr_t); j++) {
291		caddr_t check = (caddr_t) instr - j;
292		uint8_t *ptr;
293
294		if (check < symval->value)
295			break;
296
297		if (check + sizeof (caddr_t) > (caddr_t)limit)
298			continue;
299
300		ptr = *(uint8_t **)check;
301
302		if (ptr >= (uint8_t *) symval->value && ptr < limit) {
303			instr += size;
304			goto again;
305		}
306	}
307
308	/*
309	 * We have a winner!
310	 */
311	fbt = malloc(sizeof (fbt_probe_t), M_FBT, M_WAITOK | M_ZERO);
312	fbt->fbtp_name = name;
313
314	if (retfbt == NULL) {
315		fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
316		    name, FBT_RETURN, 3, fbt);
317	} else {
318		retfbt->fbtp_probenext = fbt;
319		fbt->fbtp_id = retfbt->fbtp_id;
320	}
321
322	retfbt = fbt;
323	fbt->fbtp_patchpoint = instr;
324	fbt->fbtp_ctl = lf;
325	fbt->fbtp_loadcnt = lf->loadcnt;
326	fbt->fbtp_symindx = symindx;
327
328#ifndef __amd64__
329	if (*instr == FBT_POPL_EBP) {
330		fbt->fbtp_rval = DTRACE_INVOP_POPL_EBP;
331	} else {
332		ASSERT(*instr == FBT_LEAVE);
333		fbt->fbtp_rval = DTRACE_INVOP_LEAVE;
334	}
335	fbt->fbtp_roffset =
336	    (uintptr_t)(instr - (uint8_t *) symval->value) + 1;
337
338#else
339	ASSERT(*instr == FBT_RET);
340	fbt->fbtp_rval = DTRACE_INVOP_RET;
341	fbt->fbtp_roffset =
342	    (uintptr_t)(instr - (uint8_t *) symval->value);
343#endif
344
345	fbt->fbtp_savedval = *instr;
346	fbt->fbtp_patchval = FBT_PATCHVAL;
347	fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
348	fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
349
350	lf->fbt_nentries++;
351
352	instr += size;
353	goto again;
354}
355