1/*	$NetBSD: fbt_isa.c,v 1.6 2021/08/18 11:26:17 riastradh Exp $	*/
2
3/*
4 * CDDL HEADER START
5 *
6 * The contents of this file are subject to the terms of the
7 * Common Development and Distribution License (the "License").
8 * You may not use this file except in compliance with the License.
9 *
10 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11 * or http://www.opensolaris.org/os/licensing.
12 * See the License for the specific language governing permissions
13 * and limitations under the License.
14 *
15 * When distributing Covered Code, include this CDDL HEADER in each
16 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17 * If applicable, add the following below this CDDL HEADER, with the
18 * fields enclosed by brackets "[]" replaced with your own identifying
19 * information: Portions Copyright [yyyy] [name of copyright owner]
20 *
21 * CDDL HEADER END
22 *
23 * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
24 *
25 * $FreeBSD: head/sys/cddl/dev/fbt/x86/fbt_isa.c 309785 2016-12-10 03:11:05Z markj $
26 *
27 */
28
29/*
30 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
31 * Use is subject to license terms.
32 */
33
34#include <sys/cdefs.h>
35#include <sys/proc.h>
36#include <sys/param.h>
37#include <sys/cpu.h>
38#include <sys/module.h>
39#include <sys/kmem.h>
40
41#include <sys/dtrace.h>
42
43#if 1
44#include <machine/cpufunc.h>
45#include <machine/specialreg.h>
46#if 0
47#include <x86/cpuvar.h>
48#endif
49#include <x86/cputypes.h>
50#endif
51
52#include "fbt.h"
53
54#define	FBT_PUSHL_EBP		0x55
55#define	FBT_MOVL_ESP_EBP0_V0	0x8b
56#define	FBT_MOVL_ESP_EBP1_V0	0xec
57#define	FBT_MOVL_ESP_EBP0_V1	0x89
58#define	FBT_MOVL_ESP_EBP1_V1	0xe5
59#define	FBT_REX_RSP_RBP		0x48
60
61#define	FBT_POPL_EBP		0x5d
62#define	FBT_RET			0xc3
63#define	FBT_RET_IMM16		0xc2
64#define	FBT_LEAVE		0xc9
65
66#ifdef __amd64__
67#define	FBT_PATCHVAL		0xcc
68#else
69#define	FBT_PATCHVAL		0xf0
70#endif
71
72#define	FBT_ENTRY	"entry"
73#define	FBT_RETURN	"return"
74
75int
76fbt_invop(uintptr_t addr, struct trapframe *frame, uintptr_t rval)
77{
78	solaris_cpu_t *cpu;
79	uintptr_t *stack;
80	uintptr_t arg0, arg1, arg2, arg3, arg4;
81	fbt_probe_t *fbt;
82
83#ifdef __amd64__
84	stack = (uintptr_t *)frame->tf_rsp;
85#else
86	/* Skip hardware-saved registers. */
87#ifdef __NetBSD__
88	stack = (uintptr_t *)&frame->tf_esp;
89#else
90	stack = (uintptr_t *)frame->tf_isp + 3;
91#endif
92#endif
93
94	cpu = &solaris_cpu[cpu_number()];
95	fbt = fbt_probetab[FBT_ADDR2NDX(addr)];
96	for (; fbt != NULL; fbt = fbt->fbtp_hashnext) {
97		if ((uintptr_t)fbt->fbtp_patchpoint == addr) {
98			if (fbt->fbtp_roffset == 0) {
99#ifdef __amd64__
100				/* fbt->fbtp_rval == DTRACE_INVOP_PUSHQ_RBP */
101				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
102				cpu->cpu_dtrace_caller = stack[0];
103				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT |
104				    CPU_DTRACE_BADADDR);
105
106				arg0 = frame->tf_rdi;
107				arg1 = frame->tf_rsi;
108				arg2 = frame->tf_rdx;
109				arg3 = frame->tf_rcx;
110				arg4 = frame->tf_r8;
111#else
112				int i = 0;
113
114				/*
115				 * When accessing the arguments on the stack,
116				 * we must protect against accessing beyond
117				 * the stack.  We can safely set NOFAULT here
118				 * -- we know that interrupts are already
119				 * disabled.
120				 */
121				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
122				cpu->cpu_dtrace_caller = stack[i++];
123				arg0 = stack[i++];
124				arg1 = stack[i++];
125				arg2 = stack[i++];
126				arg3 = stack[i++];
127				arg4 = stack[i++];
128				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT |
129				    CPU_DTRACE_BADADDR);
130#endif
131
132				dtrace_probe(fbt->fbtp_id, arg0, arg1,
133				    arg2, arg3, arg4);
134
135				cpu->cpu_dtrace_caller = 0;
136			} else {
137#ifdef __amd64__
138				/*
139				 * On amd64, we instrument the ret, not the
140				 * leave.  We therefore need to set the caller
141				 * to ensure that the top frame of a stack()
142				 * action is correct.
143				 */
144				DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
145				cpu->cpu_dtrace_caller = stack[0];
146				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT |
147				    CPU_DTRACE_BADADDR);
148#endif
149
150				dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset,
151				    rval, 0, 0, 0);
152				cpu->cpu_dtrace_caller = 0;
153			}
154
155			return (fbt->fbtp_rval);
156		}
157	}
158
159	return (0);
160}
161
162
163
164
165#ifdef __FreeBSD__
166void
167fbt_patch_tracepoint(fbt_probe_t *fbt, fbt_patchval_t val)
168{
169
170	*fbt->fbtp_patchpoint = val;
171}
172#endif
173
174#ifdef __NetBSD__
175/*
176 * XXX XXX XXX This is absolutely unsafe, the mere existence of this code is a
177 * problem, because this function is too easily ROP-able. But this gets
178 * compiled as a module and never in the kernel, so we are fine "by default".
179 * XXX Add a #warning if it gets compiled in the kernel?
180 */
181void
182fbt_patch_tracepoint(fbt_probe_t *fbt, fbt_patchval_t val)
183{
184	u_long psl, cr0;
185
186	/* Disable interrupts. */
187	psl = x86_read_psl();
188	x86_disable_intr();
189
190	/* Disable write protection in supervisor mode. */
191	cr0 = rcr0();
192	lcr0(cr0 & ~CR0_WP);
193
194	/* XXX XXX XXX Shouldn't rely on caller-provided dst! */
195	/* XXX XXX XXX Shouldn't rely on caller-provided val! */
196	for (; fbt != NULL; fbt = fbt->fbtp_next) {
197		*fbt->fbtp_patchpoint = val;
198	}
199
200	/* Write back and invalidate cache, flush pipelines. */
201	wbinvd();
202	x86_flush();
203
204	/* Re-enable write protection. */
205	lcr0(cr0);
206
207	/* Restore the PSL, potentially re-enabling interrupts. */
208	x86_write_psl(psl);
209}
210#endif
211
212
213#ifdef __FreeBSD__
214int
215fbt_provide_module_function(linker_file_t lf, int symindx,
216    linker_symval_t *symval, void *opaque)
217#endif
218#ifdef __NetBSD__
219int
220fbt_provide_module_cb(const char *name, int symindx, void *value,
221    uint32_t symsize, int type, void *opaque)
222#endif
223{
224	fbt_probe_t *fbt, *retfbt;
225	u_int8_t *instr, *limit;
226	int j;
227	int size;
228
229#ifdef __FreeBSD_
230	char *modname = opaque;
231	const char *name = symval->name;
232	size_t symsize = symval->size;
233	void *value = symval->value;
234
235	/*
236	 * trap_check() is a wrapper for DTrace's fault handler, so we don't
237	 * want to be able to instrument it.
238	 */
239	if (strcmp(name, "trap_check") == 0)
240		return (0);
241#endif
242#ifdef __NetBSD__
243	struct fbt_ksyms_arg *fka = opaque;
244	modctl_t *mod = fka->fka_mod;
245	const char *modname = module_name(mod);
246
247	/* got a function? */
248	if (ELF_ST_TYPE(type) != STT_FUNC)
249		return 0;
250
251	/*
252	 * Exclude some more symbols which can be called from probe context.
253	 */
254	if (strcmp(name, "trap") == 0 ||
255	    strcmp(name, "x86_curcpu") == 0 ||
256	    strcmp(name, "x86_curlwp") == 0) {
257		return 0;
258	}
259#endif
260
261	if (fbt_excluded(name))
262		return (0);
263
264	instr = (u_int8_t *) value;
265	limit = (u_int8_t *) value + symsize;
266
267#ifdef __amd64__
268	while (instr < limit) {
269		if (*instr == FBT_PUSHL_EBP)
270			break;
271
272		if ((size = dtrace_instr_size(instr)) <= 0)
273			break;
274
275		instr += size;
276	}
277
278	if (instr >= limit || *instr != FBT_PUSHL_EBP) {
279		/*
280		 * We either don't save the frame pointer in this
281		 * function, or we ran into some disassembly
282		 * screw-up.  Either way, we bail.
283		 */
284		return (0);
285	}
286#else
287	if (instr[0] != FBT_PUSHL_EBP)
288		return (0);
289
290	if (!(instr[1] == FBT_MOVL_ESP_EBP0_V0 &&
291	    instr[2] == FBT_MOVL_ESP_EBP1_V0) &&
292	    !(instr[1] == FBT_MOVL_ESP_EBP0_V1 &&
293	    instr[2] == FBT_MOVL_ESP_EBP1_V1))
294		return (0);
295#endif
296
297	fbt = kmem_zalloc(sizeof (*fbt), KM_SLEEP);
298	fbt->fbtp_name = name;
299	fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
300	    name, FBT_ENTRY, 3, fbt);
301	fbt->fbtp_patchpoint = instr;
302#ifdef __FreeBSD__
303	fbt->fbtp_ctl = lf;
304	fbt->fbtp_loadcnt = lf->loadcnt;
305#endif
306#ifdef __NetBSD__
307	fbt->fbtp_ctl = mod;
308#endif
309	fbt->fbtp_rval = DTRACE_INVOP_PUSHL_EBP;
310	fbt->fbtp_savedval = *instr;
311	fbt->fbtp_patchval = FBT_PATCHVAL;
312	fbt->fbtp_symindx = symindx;
313
314	fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
315	fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
316#ifdef __FreeBSD__
317	lf->fbt_nentries++;
318#endif
319
320	retfbt = NULL;
321again:
322	if (instr >= limit)
323		return (0);
324
325	/*
326	 * If this disassembly fails, then we've likely walked off into
327	 * a jump table or some other unsuitable area.  Bail out of the
328	 * disassembly now.
329	 */
330	if ((size = dtrace_instr_size(instr)) <= 0)
331		return (0);
332
333#ifdef __amd64__
334	/*
335	 * We only instrument "ret" on amd64 -- we don't yet instrument
336	 * ret imm16, largely because the compiler doesn't seem to
337	 * (yet) emit them in the kernel...
338	 */
339	if (*instr != FBT_RET) {
340		instr += size;
341		goto again;
342	}
343#else
344	if (!(size == 1 &&
345	    (*instr == FBT_POPL_EBP || *instr == FBT_LEAVE) &&
346	    (*(instr + 1) == FBT_RET ||
347	    *(instr + 1) == FBT_RET_IMM16))) {
348		instr += size;
349		goto again;
350	}
351#endif
352
353	/*
354	 * We (desperately) want to avoid erroneously instrumenting a
355	 * jump table, especially given that our markers are pretty
356	 * short:  two bytes on x86, and just one byte on amd64.  To
357	 * determine if we're looking at a true instruction sequence
358	 * or an inline jump table that happens to contain the same
359	 * byte sequences, we resort to some heuristic sleeze:  we
360	 * treat this instruction as being contained within a pointer,
361	 * and see if that pointer points to within the body of the
362	 * function.  If it does, we refuse to instrument it.
363	 */
364	for (j = 0; j < sizeof (uintptr_t); j++) {
365		caddr_t check = (caddr_t) instr - j;
366		uint8_t *ptr;
367
368		if (check < (caddr_t)value)
369			break;
370
371		if (check + sizeof (caddr_t) > (caddr_t)limit)
372			continue;
373
374		ptr = *(uint8_t **)check;
375
376		if (ptr >= (uint8_t *) value && ptr < limit) {
377			instr += size;
378			goto again;
379		}
380	}
381
382	/*
383	 * We have a winner!
384	 */
385	fbt = kmem_zalloc(sizeof (*fbt), KM_SLEEP);
386	fbt->fbtp_name = name;
387
388	if (retfbt == NULL) {
389		fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
390		    name, FBT_RETURN, 3, fbt);
391	} else {
392		retfbt->fbtp_next = fbt;
393		fbt->fbtp_id = retfbt->fbtp_id;
394	}
395
396	retfbt = fbt;
397	fbt->fbtp_patchpoint = instr;
398#ifdef __FreeBSD__
399	fbt->fbtp_ctl = lf;
400	fbt->fbtp_loadcnt = lf->loadcnt;
401#endif
402#ifdef __NetBSD__
403	fbt->fbtp_ctl = mod;
404#endif
405	fbt->fbtp_symindx = symindx;
406
407#ifndef __amd64__
408	if (*instr == FBT_POPL_EBP) {
409		fbt->fbtp_rval = DTRACE_INVOP_POPL_EBP;
410	} else {
411		ASSERT(*instr == FBT_LEAVE);
412		fbt->fbtp_rval = DTRACE_INVOP_LEAVE;
413	}
414	fbt->fbtp_roffset =
415	    (uintptr_t)(instr - (uint8_t *) value) + 1;
416
417#else
418	ASSERT(*instr == FBT_RET);
419	fbt->fbtp_rval = DTRACE_INVOP_RET;
420	fbt->fbtp_roffset =
421		(uintptr_t)(instr - (uint8_t *) value);
422#endif
423
424	fbt->fbtp_savedval = *instr;
425	fbt->fbtp_patchval = FBT_PATCHVAL;
426	fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
427	fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
428
429#ifdef __FreeBSD__
430	lf->fbt_nentries++;
431#endif
432
433	instr += size;
434	goto again;
435}
436