fasttrap_isa.c revision 327551
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 *
21 * Portions Copyright 2010 The FreeBSD Foundation
22 *
23 * $FreeBSD$
24 */
25
26/*
27 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
28 * Use is subject to license terms.
29 */
30
31#ifdef illumos
32#pragma ident	"%Z%%M%	%I%	%E% SMI"
33#endif
34
35#include <sys/fasttrap_isa.h>
36#include <sys/fasttrap_impl.h>
37#include <sys/dtrace.h>
38#include <sys/dtrace_impl.h>
39#include <sys/cmn_err.h>
40#ifdef illumos
41#include <sys/regset.h>
42#include <sys/privregs.h>
43#include <sys/segments.h>
44#include <sys/x86_archext.h>
45#else
46#include <cddl/dev/dtrace/dtrace_cddl.h>
47#include <sys/types.h>
48#include <sys/proc.h>
49#include <sys/rmlock.h>
50#include <sys/dtrace_bsd.h>
51#include <cddl/dev/dtrace/x86/regset.h>
52#include <machine/segments.h>
53#include <machine/reg.h>
54#include <machine/pcb.h>
55#endif
56#include <sys/sysmacros.h>
57#ifdef illumos
58#include <sys/trap.h>
59#include <sys/archsystm.h>
60#else
61#include <sys/ptrace.h>
62#endif /* illumos */
63
64#ifdef __i386__
65#define	r_rax	r_eax
66#define	r_rbx	r_ebx
67#define	r_rip	r_eip
68#define	r_rflags r_eflags
69#define	r_rsp	r_esp
70#define	r_rbp	r_ebp
71#endif
72
73/*
74 * Lossless User-Land Tracing on x86
75 * ---------------------------------
76 *
77 * The execution of most instructions is not dependent on the address; for
78 * these instructions it is sufficient to copy them into the user process's
79 * address space and execute them. To effectively single-step an instruction
80 * in user-land, we copy out the following sequence of instructions to scratch
81 * space in the user thread's ulwp_t structure.
82 *
83 * We then set the program counter (%eip or %rip) to point to this scratch
84 * space. Once execution resumes, the original instruction is executed and
85 * then control flow is redirected to what was originally the subsequent
86 * instruction. If the kernel attemps to deliver a signal while single-
87 * stepping, the signal is deferred and the program counter is moved into the
88 * second sequence of instructions. The second sequence ends in a trap into
89 * the kernel where the deferred signal is then properly handled and delivered.
90 *
91 * For instructions whose execute is position dependent, we perform simple
92 * emulation. These instructions are limited to control transfer
93 * instructions in 32-bit mode, but in 64-bit mode there's the added wrinkle
94 * of %rip-relative addressing that means that almost any instruction can be
95 * position dependent. For all the details on how we emulate generic
96 * instructions included %rip-relative instructions, see the code in
97 * fasttrap_pid_probe() below where we handle instructions of type
98 * FASTTRAP_T_COMMON (under the header: Generic Instruction Tracing).
99 */
100
101#define	FASTTRAP_MODRM_MOD(modrm)	(((modrm) >> 6) & 0x3)
102#define	FASTTRAP_MODRM_REG(modrm)	(((modrm) >> 3) & 0x7)
103#define	FASTTRAP_MODRM_RM(modrm)	((modrm) & 0x7)
104#define	FASTTRAP_MODRM(mod, reg, rm)	(((mod) << 6) | ((reg) << 3) | (rm))
105
106#define	FASTTRAP_SIB_SCALE(sib)		(((sib) >> 6) & 0x3)
107#define	FASTTRAP_SIB_INDEX(sib)		(((sib) >> 3) & 0x7)
108#define	FASTTRAP_SIB_BASE(sib)		((sib) & 0x7)
109
110#define	FASTTRAP_REX_W(rex)		(((rex) >> 3) & 1)
111#define	FASTTRAP_REX_R(rex)		(((rex) >> 2) & 1)
112#define	FASTTRAP_REX_X(rex)		(((rex) >> 1) & 1)
113#define	FASTTRAP_REX_B(rex)		((rex) & 1)
114#define	FASTTRAP_REX(w, r, x, b)	\
115	(0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b))
116
117/*
118 * Single-byte op-codes.
119 */
120#define	FASTTRAP_PUSHL_EBP	0x55
121
122#define	FASTTRAP_JO		0x70
123#define	FASTTRAP_JNO		0x71
124#define	FASTTRAP_JB		0x72
125#define	FASTTRAP_JAE		0x73
126#define	FASTTRAP_JE		0x74
127#define	FASTTRAP_JNE		0x75
128#define	FASTTRAP_JBE		0x76
129#define	FASTTRAP_JA		0x77
130#define	FASTTRAP_JS		0x78
131#define	FASTTRAP_JNS		0x79
132#define	FASTTRAP_JP		0x7a
133#define	FASTTRAP_JNP		0x7b
134#define	FASTTRAP_JL		0x7c
135#define	FASTTRAP_JGE		0x7d
136#define	FASTTRAP_JLE		0x7e
137#define	FASTTRAP_JG		0x7f
138
139#define	FASTTRAP_NOP		0x90
140
141#define	FASTTRAP_MOV_EAX	0xb8
142#define	FASTTRAP_MOV_ECX	0xb9
143
144#define	FASTTRAP_RET16		0xc2
145#define	FASTTRAP_RET		0xc3
146
147#define	FASTTRAP_LOOPNZ		0xe0
148#define	FASTTRAP_LOOPZ		0xe1
149#define	FASTTRAP_LOOP		0xe2
150#define	FASTTRAP_JCXZ		0xe3
151
152#define	FASTTRAP_CALL		0xe8
153#define	FASTTRAP_JMP32		0xe9
154#define	FASTTRAP_JMP8		0xeb
155
156#define	FASTTRAP_INT3		0xcc
157#define	FASTTRAP_INT		0xcd
158
159#define	FASTTRAP_2_BYTE_OP	0x0f
160#define	FASTTRAP_GROUP5_OP	0xff
161
162/*
163 * Two-byte op-codes (second byte only).
164 */
165#define	FASTTRAP_0F_JO		0x80
166#define	FASTTRAP_0F_JNO		0x81
167#define	FASTTRAP_0F_JB		0x82
168#define	FASTTRAP_0F_JAE		0x83
169#define	FASTTRAP_0F_JE		0x84
170#define	FASTTRAP_0F_JNE		0x85
171#define	FASTTRAP_0F_JBE		0x86
172#define	FASTTRAP_0F_JA		0x87
173#define	FASTTRAP_0F_JS		0x88
174#define	FASTTRAP_0F_JNS		0x89
175#define	FASTTRAP_0F_JP		0x8a
176#define	FASTTRAP_0F_JNP		0x8b
177#define	FASTTRAP_0F_JL		0x8c
178#define	FASTTRAP_0F_JGE		0x8d
179#define	FASTTRAP_0F_JLE		0x8e
180#define	FASTTRAP_0F_JG		0x8f
181
182#define	FASTTRAP_EFLAGS_OF	0x800
183#define	FASTTRAP_EFLAGS_DF	0x400
184#define	FASTTRAP_EFLAGS_SF	0x080
185#define	FASTTRAP_EFLAGS_ZF	0x040
186#define	FASTTRAP_EFLAGS_AF	0x010
187#define	FASTTRAP_EFLAGS_PF	0x004
188#define	FASTTRAP_EFLAGS_CF	0x001
189
190/*
191 * Instruction prefixes.
192 */
193#define	FASTTRAP_PREFIX_OPERAND	0x66
194#define	FASTTRAP_PREFIX_ADDRESS	0x67
195#define	FASTTRAP_PREFIX_CS	0x2E
196#define	FASTTRAP_PREFIX_DS	0x3E
197#define	FASTTRAP_PREFIX_ES	0x26
198#define	FASTTRAP_PREFIX_FS	0x64
199#define	FASTTRAP_PREFIX_GS	0x65
200#define	FASTTRAP_PREFIX_SS	0x36
201#define	FASTTRAP_PREFIX_LOCK	0xF0
202#define	FASTTRAP_PREFIX_REP	0xF3
203#define	FASTTRAP_PREFIX_REPNE	0xF2
204
205#define	FASTTRAP_NOREG	0xff
206
207/*
208 * Map between instruction register encodings and the kernel constants which
209 * correspond to indicies into struct regs.
210 */
211#ifdef __amd64
212static const uint8_t regmap[16] = {
213	REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI,
214	REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15,
215};
216#else
217static const uint8_t regmap[8] = {
218	EAX, ECX, EDX, EBX, UESP, EBP, ESI, EDI
219};
220#endif
221
222static ulong_t fasttrap_getreg(struct reg *, uint_t);
223
224static uint64_t
225fasttrap_anarg(struct reg *rp, int function_entry, int argno)
226{
227	uint64_t value = 0;
228	int shift = function_entry ? 1 : 0;
229
230#ifdef __amd64
231	if (curproc->p_model == DATAMODEL_LP64) {
232		uintptr_t *stack;
233
234		/*
235		 * In 64-bit mode, the first six arguments are stored in
236		 * registers.
237		 */
238		if (argno < 6)
239			switch (argno) {
240			case 0:
241				return (rp->r_rdi);
242			case 1:
243				return (rp->r_rsi);
244			case 2:
245				return (rp->r_rdx);
246			case 3:
247				return (rp->r_rcx);
248			case 4:
249				return (rp->r_r8);
250			case 5:
251				return (rp->r_r9);
252			}
253
254		stack = (uintptr_t *)rp->r_rsp;
255		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
256		value = dtrace_fulword(&stack[argno - 6 + shift]);
257		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
258	} else {
259#endif
260#ifdef __i386
261		uint32_t *stack = (uint32_t *)rp->r_esp;
262		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
263		value = dtrace_fuword32(&stack[argno + shift]);
264		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
265#endif
266#ifdef __amd64
267	}
268#endif
269
270	return (value);
271}
272
273/*ARGSUSED*/
274int
275fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc,
276    fasttrap_probe_type_t type)
277{
278	uint8_t instr[FASTTRAP_MAX_INSTR_SIZE + 10];
279	size_t len = FASTTRAP_MAX_INSTR_SIZE;
280	size_t first = MIN(len, PAGESIZE - (pc & PAGEOFFSET));
281	uint_t start = 0;
282	int rmindex, size;
283	uint8_t seg, rex = 0;
284
285	/*
286	 * Read the instruction at the given address out of the process's
287	 * address space. We don't have to worry about a debugger
288	 * changing this instruction before we overwrite it with our trap
289	 * instruction since P_PR_LOCK is set. Since instructions can span
290	 * pages, we potentially read the instruction in two parts. If the
291	 * second part fails, we just zero out that part of the instruction.
292	 */
293	if (uread(p, &instr[0], first, pc) != 0)
294		return (-1);
295	if (len > first &&
296	    uread(p, &instr[first], len - first, pc + first) != 0) {
297		bzero(&instr[first], len - first);
298		len = first;
299	}
300
301	/*
302	 * If the disassembly fails, then we have a malformed instruction.
303	 */
304	if ((size = dtrace_instr_size_isa(instr, p->p_model, &rmindex)) <= 0)
305		return (-1);
306
307	/*
308	 * Make sure the disassembler isn't completely broken.
309	 */
310	ASSERT(-1 <= rmindex && rmindex < size);
311
312	/*
313	 * If the computed size is greater than the number of bytes read,
314	 * then it was a malformed instruction possibly because it fell on a
315	 * page boundary and the subsequent page was missing or because of
316	 * some malicious user.
317	 */
318	if (size > len)
319		return (-1);
320
321	tp->ftt_size = (uint8_t)size;
322	tp->ftt_segment = FASTTRAP_SEG_NONE;
323
324	/*
325	 * Find the start of the instruction's opcode by processing any
326	 * legacy prefixes.
327	 */
328	for (;;) {
329		seg = 0;
330		switch (instr[start]) {
331		case FASTTRAP_PREFIX_SS:
332			seg++;
333			/*FALLTHRU*/
334		case FASTTRAP_PREFIX_GS:
335			seg++;
336			/*FALLTHRU*/
337		case FASTTRAP_PREFIX_FS:
338			seg++;
339			/*FALLTHRU*/
340		case FASTTRAP_PREFIX_ES:
341			seg++;
342			/*FALLTHRU*/
343		case FASTTRAP_PREFIX_DS:
344			seg++;
345			/*FALLTHRU*/
346		case FASTTRAP_PREFIX_CS:
347			seg++;
348			/*FALLTHRU*/
349		case FASTTRAP_PREFIX_OPERAND:
350		case FASTTRAP_PREFIX_ADDRESS:
351		case FASTTRAP_PREFIX_LOCK:
352		case FASTTRAP_PREFIX_REP:
353		case FASTTRAP_PREFIX_REPNE:
354			if (seg != 0) {
355				/*
356				 * It's illegal for an instruction to specify
357				 * two segment prefixes -- give up on this
358				 * illegal instruction.
359				 */
360				if (tp->ftt_segment != FASTTRAP_SEG_NONE)
361					return (-1);
362
363				tp->ftt_segment = seg;
364			}
365			start++;
366			continue;
367		}
368		break;
369	}
370
371#ifdef __amd64
372	/*
373	 * Identify the REX prefix on 64-bit processes.
374	 */
375	if (p->p_model == DATAMODEL_LP64 && (instr[start] & 0xf0) == 0x40)
376		rex = instr[start++];
377#endif
378
379	/*
380	 * Now that we're pretty sure that the instruction is okay, copy the
381	 * valid part to the tracepoint.
382	 */
383	bcopy(instr, tp->ftt_instr, FASTTRAP_MAX_INSTR_SIZE);
384
385	tp->ftt_type = FASTTRAP_T_COMMON;
386	if (instr[start] == FASTTRAP_2_BYTE_OP) {
387		switch (instr[start + 1]) {
388		case FASTTRAP_0F_JO:
389		case FASTTRAP_0F_JNO:
390		case FASTTRAP_0F_JB:
391		case FASTTRAP_0F_JAE:
392		case FASTTRAP_0F_JE:
393		case FASTTRAP_0F_JNE:
394		case FASTTRAP_0F_JBE:
395		case FASTTRAP_0F_JA:
396		case FASTTRAP_0F_JS:
397		case FASTTRAP_0F_JNS:
398		case FASTTRAP_0F_JP:
399		case FASTTRAP_0F_JNP:
400		case FASTTRAP_0F_JL:
401		case FASTTRAP_0F_JGE:
402		case FASTTRAP_0F_JLE:
403		case FASTTRAP_0F_JG:
404			tp->ftt_type = FASTTRAP_T_JCC;
405			tp->ftt_code = (instr[start + 1] & 0x0f) | FASTTRAP_JO;
406			tp->ftt_dest = pc + tp->ftt_size +
407			    /* LINTED - alignment */
408			    *(int32_t *)&instr[start + 2];
409			break;
410		}
411	} else if (instr[start] == FASTTRAP_GROUP5_OP) {
412		uint_t mod = FASTTRAP_MODRM_MOD(instr[start + 1]);
413		uint_t reg = FASTTRAP_MODRM_REG(instr[start + 1]);
414		uint_t rm = FASTTRAP_MODRM_RM(instr[start + 1]);
415
416		if (reg == 2 || reg == 4) {
417			uint_t i, sz;
418
419			if (reg == 2)
420				tp->ftt_type = FASTTRAP_T_CALL;
421			else
422				tp->ftt_type = FASTTRAP_T_JMP;
423
424			if (mod == 3)
425				tp->ftt_code = 2;
426			else
427				tp->ftt_code = 1;
428
429			ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0);
430
431			/*
432			 * See AMD x86-64 Architecture Programmer's Manual
433			 * Volume 3, Section 1.2.7, Table 1-12, and
434			 * Appendix A.3.1, Table A-15.
435			 */
436			if (mod != 3 && rm == 4) {
437				uint8_t sib = instr[start + 2];
438				uint_t index = FASTTRAP_SIB_INDEX(sib);
439				uint_t base = FASTTRAP_SIB_BASE(sib);
440
441				tp->ftt_scale = FASTTRAP_SIB_SCALE(sib);
442
443				tp->ftt_index = (index == 4) ?
444				    FASTTRAP_NOREG :
445				    regmap[index | (FASTTRAP_REX_X(rex) << 3)];
446				tp->ftt_base = (mod == 0 && base == 5) ?
447				    FASTTRAP_NOREG :
448				    regmap[base | (FASTTRAP_REX_B(rex) << 3)];
449
450				i = 3;
451				sz = mod == 1 ? 1 : 4;
452			} else {
453				/*
454				 * In 64-bit mode, mod == 0 and r/m == 5
455				 * denotes %rip-relative addressing; in 32-bit
456				 * mode, the base register isn't used. In both
457				 * modes, there is a 32-bit operand.
458				 */
459				if (mod == 0 && rm == 5) {
460#ifdef __amd64
461					if (p->p_model == DATAMODEL_LP64)
462						tp->ftt_base = REG_RIP;
463					else
464#endif
465						tp->ftt_base = FASTTRAP_NOREG;
466					sz = 4;
467				} else  {
468					uint8_t base = rm |
469					    (FASTTRAP_REX_B(rex) << 3);
470
471					tp->ftt_base = regmap[base];
472					sz = mod == 1 ? 1 : mod == 2 ? 4 : 0;
473				}
474				tp->ftt_index = FASTTRAP_NOREG;
475				i = 2;
476			}
477
478			if (sz == 1) {
479				tp->ftt_dest = *(int8_t *)&instr[start + i];
480			} else if (sz == 4) {
481				/* LINTED - alignment */
482				tp->ftt_dest = *(int32_t *)&instr[start + i];
483			} else {
484				tp->ftt_dest = 0;
485			}
486		}
487	} else {
488		switch (instr[start]) {
489		case FASTTRAP_RET:
490			tp->ftt_type = FASTTRAP_T_RET;
491			break;
492
493		case FASTTRAP_RET16:
494			tp->ftt_type = FASTTRAP_T_RET16;
495			/* LINTED - alignment */
496			tp->ftt_dest = *(uint16_t *)&instr[start + 1];
497			break;
498
499		case FASTTRAP_JO:
500		case FASTTRAP_JNO:
501		case FASTTRAP_JB:
502		case FASTTRAP_JAE:
503		case FASTTRAP_JE:
504		case FASTTRAP_JNE:
505		case FASTTRAP_JBE:
506		case FASTTRAP_JA:
507		case FASTTRAP_JS:
508		case FASTTRAP_JNS:
509		case FASTTRAP_JP:
510		case FASTTRAP_JNP:
511		case FASTTRAP_JL:
512		case FASTTRAP_JGE:
513		case FASTTRAP_JLE:
514		case FASTTRAP_JG:
515			tp->ftt_type = FASTTRAP_T_JCC;
516			tp->ftt_code = instr[start];
517			tp->ftt_dest = pc + tp->ftt_size +
518			    (int8_t)instr[start + 1];
519			break;
520
521		case FASTTRAP_LOOPNZ:
522		case FASTTRAP_LOOPZ:
523		case FASTTRAP_LOOP:
524			tp->ftt_type = FASTTRAP_T_LOOP;
525			tp->ftt_code = instr[start];
526			tp->ftt_dest = pc + tp->ftt_size +
527			    (int8_t)instr[start + 1];
528			break;
529
530		case FASTTRAP_JCXZ:
531			tp->ftt_type = FASTTRAP_T_JCXZ;
532			tp->ftt_dest = pc + tp->ftt_size +
533			    (int8_t)instr[start + 1];
534			break;
535
536		case FASTTRAP_CALL:
537			tp->ftt_type = FASTTRAP_T_CALL;
538			tp->ftt_dest = pc + tp->ftt_size +
539			    /* LINTED - alignment */
540			    *(int32_t *)&instr[start + 1];
541			tp->ftt_code = 0;
542			break;
543
544		case FASTTRAP_JMP32:
545			tp->ftt_type = FASTTRAP_T_JMP;
546			tp->ftt_dest = pc + tp->ftt_size +
547			    /* LINTED - alignment */
548			    *(int32_t *)&instr[start + 1];
549			break;
550		case FASTTRAP_JMP8:
551			tp->ftt_type = FASTTRAP_T_JMP;
552			tp->ftt_dest = pc + tp->ftt_size +
553			    (int8_t)instr[start + 1];
554			break;
555
556		case FASTTRAP_PUSHL_EBP:
557			if (start == 0)
558				tp->ftt_type = FASTTRAP_T_PUSHL_EBP;
559			break;
560
561		case FASTTRAP_NOP:
562#ifdef __amd64
563			ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0);
564
565			/*
566			 * On amd64 we have to be careful not to confuse a nop
567			 * (actually xchgl %eax, %eax) with an instruction using
568			 * the same opcode, but that does something different
569			 * (e.g. xchgl %r8d, %eax or xcghq %r8, %rax).
570			 */
571			if (FASTTRAP_REX_B(rex) == 0)
572#endif
573				tp->ftt_type = FASTTRAP_T_NOP;
574			break;
575
576		case FASTTRAP_INT3:
577			/*
578			 * The pid provider shares the int3 trap with debugger
579			 * breakpoints so we can't instrument them.
580			 */
581			ASSERT(instr[start] == FASTTRAP_INSTR);
582			return (-1);
583
584		case FASTTRAP_INT:
585			/*
586			 * Interrupts seem like they could be traced with
587			 * no negative implications, but it's possible that
588			 * a thread could be redirected by the trap handling
589			 * code which would eventually return to the
590			 * instruction after the interrupt. If the interrupt
591			 * were in our scratch space, the subsequent
592			 * instruction might be overwritten before we return.
593			 * Accordingly we refuse to instrument any interrupt.
594			 */
595			return (-1);
596		}
597	}
598
599#ifdef __amd64
600	if (p->p_model == DATAMODEL_LP64 && tp->ftt_type == FASTTRAP_T_COMMON) {
601		/*
602		 * If the process is 64-bit and the instruction type is still
603		 * FASTTRAP_T_COMMON -- meaning we're going to copy it out an
604		 * execute it -- we need to watch for %rip-relative
605		 * addressing mode. See the portion of fasttrap_pid_probe()
606		 * below where we handle tracepoints with type
607		 * FASTTRAP_T_COMMON for how we emulate instructions that
608		 * employ %rip-relative addressing.
609		 */
610		if (rmindex != -1) {
611			uint_t mod = FASTTRAP_MODRM_MOD(instr[rmindex]);
612			uint_t reg = FASTTRAP_MODRM_REG(instr[rmindex]);
613			uint_t rm = FASTTRAP_MODRM_RM(instr[rmindex]);
614
615			ASSERT(rmindex > start);
616
617			if (mod == 0 && rm == 5) {
618				/*
619				 * We need to be sure to avoid other
620				 * registers used by this instruction. While
621				 * the reg field may determine the op code
622				 * rather than denoting a register, assuming
623				 * that it denotes a register is always safe.
624				 * We leave the REX field intact and use
625				 * whatever value's there for simplicity.
626				 */
627				if (reg != 0) {
628					tp->ftt_ripmode = FASTTRAP_RIP_1 |
629					    (FASTTRAP_RIP_X *
630					    FASTTRAP_REX_B(rex));
631					rm = 0;
632				} else {
633					tp->ftt_ripmode = FASTTRAP_RIP_2 |
634					    (FASTTRAP_RIP_X *
635					    FASTTRAP_REX_B(rex));
636					rm = 1;
637				}
638
639				tp->ftt_modrm = tp->ftt_instr[rmindex];
640				tp->ftt_instr[rmindex] =
641				    FASTTRAP_MODRM(2, reg, rm);
642			}
643		}
644	}
645#endif
646
647	return (0);
648}
649
650int
651fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
652{
653	fasttrap_instr_t instr = FASTTRAP_INSTR;
654
655	if (uwrite(p, &instr, 1, tp->ftt_pc) != 0)
656		return (-1);
657
658	return (0);
659}
660
661int
662fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
663{
664	uint8_t instr;
665
666	/*
667	 * Distinguish between read or write failures and a changed
668	 * instruction.
669	 */
670	if (uread(p, &instr, 1, tp->ftt_pc) != 0)
671		return (0);
672	if (instr != FASTTRAP_INSTR)
673		return (0);
674	if (uwrite(p, &tp->ftt_instr[0], 1, tp->ftt_pc) != 0)
675		return (-1);
676
677	return (0);
678}
679
680#ifdef __amd64
681static uintptr_t
682fasttrap_fulword_noerr(const void *uaddr)
683{
684	uintptr_t ret;
685
686	if ((ret = fasttrap_fulword(uaddr)) != -1)
687		return (ret);
688
689	return (0);
690}
691#endif
692
693#ifdef __i386__
694static uint32_t
695fasttrap_fuword32_noerr(const void *uaddr)
696{
697	uint32_t ret;
698
699	if ((ret = fasttrap_fuword32(uaddr)) != -1)
700		return (ret);
701
702	return (0);
703}
704#endif
705
706static void
707fasttrap_return_common(struct reg *rp, uintptr_t pc, pid_t pid,
708    uintptr_t new_pc)
709{
710	fasttrap_tracepoint_t *tp;
711	fasttrap_bucket_t *bucket;
712	fasttrap_id_t *id;
713#ifdef illumos
714	kmutex_t *pid_mtx;
715
716	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
717	mutex_enter(pid_mtx);
718#else
719	struct rm_priotracker tracker;
720
721	rm_rlock(&fasttrap_tp_lock, &tracker);
722#endif
723	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
724
725	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
726		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
727		    tp->ftt_proc->ftpc_acount != 0)
728			break;
729	}
730
731	/*
732	 * Don't sweat it if we can't find the tracepoint again; unlike
733	 * when we're in fasttrap_pid_probe(), finding the tracepoint here
734	 * is not essential to the correct execution of the process.
735	 */
736	if (tp == NULL) {
737#ifdef illumos
738		mutex_exit(pid_mtx);
739#else
740		rm_runlock(&fasttrap_tp_lock, &tracker);
741#endif
742		return;
743	}
744
745	for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
746		/*
747		 * If there's a branch that could act as a return site, we
748		 * need to trace it, and check here if the program counter is
749		 * external to the function.
750		 */
751		if (tp->ftt_type != FASTTRAP_T_RET &&
752		    tp->ftt_type != FASTTRAP_T_RET16 &&
753		    new_pc - id->fti_probe->ftp_faddr <
754		    id->fti_probe->ftp_fsize)
755			continue;
756
757		dtrace_probe(id->fti_probe->ftp_id,
758		    pc - id->fti_probe->ftp_faddr,
759		    rp->r_rax, rp->r_rbx, 0, 0);
760	}
761
762#ifdef illumos
763	mutex_exit(pid_mtx);
764#else
765	rm_runlock(&fasttrap_tp_lock, &tracker);
766#endif
767}
768
769static void
770fasttrap_sigsegv(proc_t *p, kthread_t *t, uintptr_t addr)
771{
772#ifdef illumos
773	sigqueue_t *sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
774
775	sqp->sq_info.si_signo = SIGSEGV;
776	sqp->sq_info.si_code = SEGV_MAPERR;
777	sqp->sq_info.si_addr = (caddr_t)addr;
778
779	mutex_enter(&p->p_lock);
780	sigaddqa(p, t, sqp);
781	mutex_exit(&p->p_lock);
782
783	if (t != NULL)
784		aston(t);
785#else
786	ksiginfo_t *ksi = kmem_zalloc(sizeof (ksiginfo_t), KM_SLEEP);
787
788	ksiginfo_init(ksi);
789	ksi->ksi_signo = SIGSEGV;
790	ksi->ksi_code = SEGV_MAPERR;
791	ksi->ksi_addr = (caddr_t)addr;
792	(void) tdksignal(t, SIGSEGV, ksi);
793#endif
794}
795
796#ifdef __amd64
797static void
798fasttrap_usdt_args64(fasttrap_probe_t *probe, struct reg *rp, int argc,
799    uintptr_t *argv)
800{
801	int i, x, cap = MIN(argc, probe->ftp_nargs);
802	uintptr_t *stack = (uintptr_t *)rp->r_rsp;
803
804	for (i = 0; i < cap; i++) {
805		x = probe->ftp_argmap[i];
806
807		if (x < 6)
808			argv[i] = (&rp->r_rdi)[x];
809		else
810			argv[i] = fasttrap_fulword_noerr(&stack[x]);
811	}
812
813	for (; i < argc; i++) {
814		argv[i] = 0;
815	}
816}
817#endif
818
819#ifdef __i386__
820static void
821fasttrap_usdt_args32(fasttrap_probe_t *probe, struct reg *rp, int argc,
822    uint32_t *argv)
823{
824	int i, x, cap = MIN(argc, probe->ftp_nargs);
825	uint32_t *stack = (uint32_t *)rp->r_rsp;
826
827	for (i = 0; i < cap; i++) {
828		x = probe->ftp_argmap[i];
829
830		argv[i] = fasttrap_fuword32_noerr(&stack[x]);
831	}
832
833	for (; i < argc; i++) {
834		argv[i] = 0;
835	}
836}
837#endif
838
839static int
840fasttrap_do_seg(fasttrap_tracepoint_t *tp, struct reg *rp, uintptr_t *addr)
841{
842	proc_t *p = curproc;
843#ifdef __i386__
844	struct segment_descriptor *desc;
845#else
846	struct user_segment_descriptor *desc;
847#endif
848	uint16_t sel = 0, ndx, type;
849	uintptr_t limit;
850
851	switch (tp->ftt_segment) {
852	case FASTTRAP_SEG_CS:
853		sel = rp->r_cs;
854		break;
855	case FASTTRAP_SEG_DS:
856		sel = rp->r_ds;
857		break;
858	case FASTTRAP_SEG_ES:
859		sel = rp->r_es;
860		break;
861	case FASTTRAP_SEG_FS:
862		sel = rp->r_fs;
863		break;
864	case FASTTRAP_SEG_GS:
865		sel = rp->r_gs;
866		break;
867	case FASTTRAP_SEG_SS:
868		sel = rp->r_ss;
869		break;
870	}
871
872	/*
873	 * Make sure the given segment register specifies a user priority
874	 * selector rather than a kernel selector.
875	 */
876	if (ISPL(sel) != SEL_UPL)
877		return (-1);
878
879	ndx = IDXSEL(sel);
880
881	/*
882	 * Check the bounds and grab the descriptor out of the specified
883	 * descriptor table.
884	 */
885	if (ISLDT(sel)) {
886#ifdef __i386__
887		if (ndx > p->p_md.md_ldt->ldt_len)
888			return (-1);
889
890		desc = (struct segment_descriptor *)
891		    p->p_md.md_ldt[ndx].ldt_base;
892#else
893		if (ndx > max_ldt_segment)
894			return (-1);
895
896		desc = (struct user_segment_descriptor *)
897		    p->p_md.md_ldt[ndx].ldt_base;
898#endif
899
900	} else {
901		if (ndx >= NGDT)
902			return (-1);
903
904#ifdef __i386__
905		desc = &gdt[ndx].sd;
906#else
907		desc = &gdt[ndx];
908#endif
909	}
910
911	/*
912	 * The descriptor must have user privilege level and it must be
913	 * present in memory.
914	 */
915	if (desc->sd_dpl != SEL_UPL || desc->sd_p != 1)
916		return (-1);
917
918	type = desc->sd_type;
919
920	/*
921	 * If the S bit in the type field is not set, this descriptor can
922	 * only be used in system context.
923	 */
924	if ((type & 0x10) != 0x10)
925		return (-1);
926
927	limit = USD_GETLIMIT(desc) * (desc->sd_gran ? PAGESIZE : 1);
928
929	if (tp->ftt_segment == FASTTRAP_SEG_CS) {
930		/*
931		 * The code/data bit and readable bit must both be set.
932		 */
933		if ((type & 0xa) != 0xa)
934			return (-1);
935
936		if (*addr > limit)
937			return (-1);
938	} else {
939		/*
940		 * The code/data bit must be clear.
941		 */
942		if ((type & 0x8) != 0)
943			return (-1);
944
945		/*
946		 * If the expand-down bit is clear, we just check the limit as
947		 * it would naturally be applied. Otherwise, we need to check
948		 * that the address is the range [limit + 1 .. 0xffff] or
949		 * [limit + 1 ... 0xffffffff] depending on if the default
950		 * operand size bit is set.
951		 */
952		if ((type & 0x4) == 0) {
953			if (*addr > limit)
954				return (-1);
955		} else if (desc->sd_def32) {
956			if (*addr < limit + 1 || 0xffff < *addr)
957				return (-1);
958		} else {
959			if (*addr < limit + 1 || 0xffffffff < *addr)
960				return (-1);
961		}
962	}
963
964	*addr += USD_GETBASE(desc);
965
966	return (0);
967}
968
969int
970fasttrap_pid_probe(struct trapframe *tf)
971{
972	struct reg reg, *rp;
973	proc_t *p = curproc, *pp;
974	struct rm_priotracker tracker;
975	uintptr_t pc;
976	uintptr_t new_pc = 0;
977	fasttrap_bucket_t *bucket;
978#ifdef illumos
979	kmutex_t *pid_mtx;
980#endif
981	fasttrap_tracepoint_t *tp, tp_local;
982	pid_t pid;
983	dtrace_icookie_t cookie;
984	uint_t is_enabled = 0;
985
986	fill_frame_regs(tf, &reg);
987	rp = &reg;
988
989	pc = rp->r_rip - 1;
990
991	/*
992	 * It's possible that a user (in a veritable orgy of bad planning)
993	 * could redirect this thread's flow of control before it reached the
994	 * return probe fasttrap. In this case we need to kill the process
995	 * since it's in a unrecoverable state.
996	 */
997	if (curthread->t_dtrace_step) {
998		ASSERT(curthread->t_dtrace_on);
999		fasttrap_sigtrap(p, curthread, pc);
1000		return (0);
1001	}
1002
1003	/*
1004	 * Clear all user tracing flags.
1005	 */
1006	curthread->t_dtrace_ft = 0;
1007	curthread->t_dtrace_pc = 0;
1008	curthread->t_dtrace_npc = 0;
1009	curthread->t_dtrace_scrpc = 0;
1010	curthread->t_dtrace_astpc = 0;
1011#ifdef __amd64
1012	curthread->t_dtrace_regv = 0;
1013#endif
1014
1015	/*
1016	 * Treat a child created by a call to vfork(2) as if it were its
1017	 * parent. We know that there's only one thread of control in such a
1018	 * process: this one.
1019	 */
1020#ifdef illumos
1021	while (p->p_flag & SVFORK) {
1022		p = p->p_parent;
1023	}
1024
1025	pid = p->p_pid;
1026	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
1027	mutex_enter(pid_mtx);
1028#else
1029	pp = p;
1030	sx_slock(&proctree_lock);
1031	while (pp->p_vmspace == pp->p_pptr->p_vmspace)
1032		pp = pp->p_pptr;
1033	pid = pp->p_pid;
1034	sx_sunlock(&proctree_lock);
1035	pp = NULL;
1036
1037	rm_rlock(&fasttrap_tp_lock, &tracker);
1038#endif
1039
1040	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
1041
1042	/*
1043	 * Lookup the tracepoint that the process just hit.
1044	 */
1045	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
1046		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
1047		    tp->ftt_proc->ftpc_acount != 0)
1048			break;
1049	}
1050
1051	/*
1052	 * If we couldn't find a matching tracepoint, either a tracepoint has
1053	 * been inserted without using the pid<pid> ioctl interface (see
1054	 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
1055	 */
1056	if (tp == NULL) {
1057#ifdef illumos
1058		mutex_exit(pid_mtx);
1059#else
1060		rm_runlock(&fasttrap_tp_lock, &tracker);
1061#endif
1062		return (-1);
1063	}
1064
1065	/*
1066	 * Set the program counter to the address of the traced instruction
1067	 * so that it looks right in ustack() output.
1068	 */
1069	rp->r_rip = pc;
1070
1071	if (tp->ftt_ids != NULL) {
1072		fasttrap_id_t *id;
1073
1074#ifdef __amd64
1075		if (p->p_model == DATAMODEL_LP64) {
1076			for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
1077				fasttrap_probe_t *probe = id->fti_probe;
1078
1079				if (id->fti_ptype == DTFTP_ENTRY) {
1080					/*
1081					 * We note that this was an entry
1082					 * probe to help ustack() find the
1083					 * first caller.
1084					 */
1085					cookie = dtrace_interrupt_disable();
1086					DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
1087					dtrace_probe(probe->ftp_id, rp->r_rdi,
1088					    rp->r_rsi, rp->r_rdx, rp->r_rcx,
1089					    rp->r_r8);
1090					DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
1091					dtrace_interrupt_enable(cookie);
1092				} else if (id->fti_ptype == DTFTP_IS_ENABLED) {
1093					/*
1094					 * Note that in this case, we don't
1095					 * call dtrace_probe() since it's only
1096					 * an artificial probe meant to change
1097					 * the flow of control so that it
1098					 * encounters the true probe.
1099					 */
1100					is_enabled = 1;
1101				} else if (probe->ftp_argmap == NULL) {
1102					dtrace_probe(probe->ftp_id, rp->r_rdi,
1103					    rp->r_rsi, rp->r_rdx, rp->r_rcx,
1104					    rp->r_r8);
1105				} else {
1106					uintptr_t t[5];
1107
1108					fasttrap_usdt_args64(probe, rp,
1109					    sizeof (t) / sizeof (t[0]), t);
1110
1111					dtrace_probe(probe->ftp_id, t[0], t[1],
1112					    t[2], t[3], t[4]);
1113				}
1114			}
1115		} else {
1116#else /* __amd64 */
1117			uintptr_t s0, s1, s2, s3, s4, s5;
1118			uint32_t *stack = (uint32_t *)rp->r_esp;
1119
1120			/*
1121			 * In 32-bit mode, all arguments are passed on the
1122			 * stack. If this is a function entry probe, we need
1123			 * to skip the first entry on the stack as it
1124			 * represents the return address rather than a
1125			 * parameter to the function.
1126			 */
1127			s0 = fasttrap_fuword32_noerr(&stack[0]);
1128			s1 = fasttrap_fuword32_noerr(&stack[1]);
1129			s2 = fasttrap_fuword32_noerr(&stack[2]);
1130			s3 = fasttrap_fuword32_noerr(&stack[3]);
1131			s4 = fasttrap_fuword32_noerr(&stack[4]);
1132			s5 = fasttrap_fuword32_noerr(&stack[5]);
1133
1134			for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
1135				fasttrap_probe_t *probe = id->fti_probe;
1136
1137				if (id->fti_ptype == DTFTP_ENTRY) {
1138					/*
1139					 * We note that this was an entry
1140					 * probe to help ustack() find the
1141					 * first caller.
1142					 */
1143					cookie = dtrace_interrupt_disable();
1144					DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
1145					dtrace_probe(probe->ftp_id, s1, s2,
1146					    s3, s4, s5);
1147					DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
1148					dtrace_interrupt_enable(cookie);
1149				} else if (id->fti_ptype == DTFTP_IS_ENABLED) {
1150					/*
1151					 * Note that in this case, we don't
1152					 * call dtrace_probe() since it's only
1153					 * an artificial probe meant to change
1154					 * the flow of control so that it
1155					 * encounters the true probe.
1156					 */
1157					is_enabled = 1;
1158				} else if (probe->ftp_argmap == NULL) {
1159					dtrace_probe(probe->ftp_id, s0, s1,
1160					    s2, s3, s4);
1161				} else {
1162					uint32_t t[5];
1163
1164					fasttrap_usdt_args32(probe, rp,
1165					    sizeof (t) / sizeof (t[0]), t);
1166
1167					dtrace_probe(probe->ftp_id, t[0], t[1],
1168					    t[2], t[3], t[4]);
1169				}
1170			}
1171#endif /* __amd64 */
1172#ifdef __amd64
1173		}
1174#endif
1175	}
1176
1177	/*
1178	 * We're about to do a bunch of work so we cache a local copy of
1179	 * the tracepoint to emulate the instruction, and then find the
1180	 * tracepoint again later if we need to light up any return probes.
1181	 */
1182	tp_local = *tp;
1183#ifdef illumos
1184	mutex_exit(pid_mtx);
1185#else
1186	rm_runlock(&fasttrap_tp_lock, &tracker);
1187#endif
1188	tp = &tp_local;
1189
1190	/*
1191	 * Set the program counter to appear as though the traced instruction
1192	 * had completely executed. This ensures that fasttrap_getreg() will
1193	 * report the expected value for REG_RIP.
1194	 */
1195	rp->r_rip = pc + tp->ftt_size;
1196
1197	/*
1198	 * If there's an is-enabled probe connected to this tracepoint it
1199	 * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax'
1200	 * instruction that was placed there by DTrace when the binary was
1201	 * linked. As this probe is, in fact, enabled, we need to stuff 1
1202	 * into %eax or %rax. Accordingly, we can bypass all the instruction
1203	 * emulation logic since we know the inevitable result. It's possible
1204	 * that a user could construct a scenario where the 'is-enabled'
1205	 * probe was on some other instruction, but that would be a rather
1206	 * exotic way to shoot oneself in the foot.
1207	 */
1208	if (is_enabled) {
1209		rp->r_rax = 1;
1210		new_pc = rp->r_rip;
1211		goto done;
1212	}
1213
1214	/*
1215	 * We emulate certain types of instructions to ensure correctness
1216	 * (in the case of position dependent instructions) or optimize
1217	 * common cases. The rest we have the thread execute back in user-
1218	 * land.
1219	 */
1220	switch (tp->ftt_type) {
1221	case FASTTRAP_T_RET:
1222	case FASTTRAP_T_RET16:
1223	{
1224		uintptr_t dst = 0;
1225		uintptr_t addr = 0;
1226		int ret = 0;
1227
1228		/*
1229		 * We have to emulate _every_ facet of the behavior of a ret
1230		 * instruction including what happens if the load from %esp
1231		 * fails; in that case, we send a SIGSEGV.
1232		 */
1233#ifdef __amd64
1234		if (p->p_model == DATAMODEL_NATIVE) {
1235			ret = dst = fasttrap_fulword((void *)rp->r_rsp);
1236			addr = rp->r_rsp + sizeof (uintptr_t);
1237		} else {
1238#endif
1239#ifdef __i386__
1240			uint32_t dst32;
1241			ret = dst32 = fasttrap_fuword32((void *)rp->r_esp);
1242			dst = dst32;
1243			addr = rp->r_esp + sizeof (uint32_t);
1244#endif
1245#ifdef __amd64
1246		}
1247#endif
1248
1249		if (ret == -1) {
1250			fasttrap_sigsegv(p, curthread, rp->r_rsp);
1251			new_pc = pc;
1252			break;
1253		}
1254
1255		if (tp->ftt_type == FASTTRAP_T_RET16)
1256			addr += tp->ftt_dest;
1257
1258		rp->r_rsp = addr;
1259		new_pc = dst;
1260		break;
1261	}
1262
1263	case FASTTRAP_T_JCC:
1264	{
1265		uint_t taken = 0;
1266
1267		switch (tp->ftt_code) {
1268		case FASTTRAP_JO:
1269			taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) != 0;
1270			break;
1271		case FASTTRAP_JNO:
1272			taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0;
1273			break;
1274		case FASTTRAP_JB:
1275			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0;
1276			break;
1277		case FASTTRAP_JAE:
1278			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0;
1279			break;
1280		case FASTTRAP_JE:
1281			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0;
1282			break;
1283		case FASTTRAP_JNE:
1284			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0;
1285			break;
1286		case FASTTRAP_JBE:
1287			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0 ||
1288			    (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0;
1289			break;
1290		case FASTTRAP_JA:
1291			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0 &&
1292			    (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0;
1293			break;
1294		case FASTTRAP_JS:
1295			taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) != 0;
1296			break;
1297		case FASTTRAP_JNS:
1298			taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0;
1299			break;
1300		case FASTTRAP_JP:
1301			taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) != 0;
1302			break;
1303		case FASTTRAP_JNP:
1304			taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) == 0;
1305			break;
1306		case FASTTRAP_JL:
1307			taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) !=
1308			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
1309			break;
1310		case FASTTRAP_JGE:
1311			taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) ==
1312			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
1313			break;
1314		case FASTTRAP_JLE:
1315			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 ||
1316			    ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) !=
1317			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
1318			break;
1319		case FASTTRAP_JG:
1320			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 &&
1321			    ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) ==
1322			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
1323			break;
1324
1325		}
1326
1327		if (taken)
1328			new_pc = tp->ftt_dest;
1329		else
1330			new_pc = pc + tp->ftt_size;
1331		break;
1332	}
1333
1334	case FASTTRAP_T_LOOP:
1335	{
1336		uint_t taken = 0;
1337#ifdef __amd64
1338		greg_t cx = rp->r_rcx--;
1339#else
1340		greg_t cx = rp->r_ecx--;
1341#endif
1342
1343		switch (tp->ftt_code) {
1344		case FASTTRAP_LOOPNZ:
1345			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 &&
1346			    cx != 0;
1347			break;
1348		case FASTTRAP_LOOPZ:
1349			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 &&
1350			    cx != 0;
1351			break;
1352		case FASTTRAP_LOOP:
1353			taken = (cx != 0);
1354			break;
1355		}
1356
1357		if (taken)
1358			new_pc = tp->ftt_dest;
1359		else
1360			new_pc = pc + tp->ftt_size;
1361		break;
1362	}
1363
1364	case FASTTRAP_T_JCXZ:
1365	{
1366#ifdef __amd64
1367		greg_t cx = rp->r_rcx;
1368#else
1369		greg_t cx = rp->r_ecx;
1370#endif
1371
1372		if (cx == 0)
1373			new_pc = tp->ftt_dest;
1374		else
1375			new_pc = pc + tp->ftt_size;
1376		break;
1377	}
1378
1379	case FASTTRAP_T_PUSHL_EBP:
1380	{
1381		int ret = 0;
1382
1383#ifdef __amd64
1384		if (p->p_model == DATAMODEL_NATIVE) {
1385			rp->r_rsp -= sizeof (uintptr_t);
1386			ret = fasttrap_sulword((void *)rp->r_rsp, rp->r_rbp);
1387		} else {
1388#endif
1389#ifdef __i386__
1390			rp->r_rsp -= sizeof (uint32_t);
1391			ret = fasttrap_suword32((void *)rp->r_rsp, rp->r_rbp);
1392#endif
1393#ifdef __amd64
1394		}
1395#endif
1396
1397		if (ret == -1) {
1398			fasttrap_sigsegv(p, curthread, rp->r_rsp);
1399			new_pc = pc;
1400			break;
1401		}
1402
1403		new_pc = pc + tp->ftt_size;
1404		break;
1405	}
1406
1407	case FASTTRAP_T_NOP:
1408		new_pc = pc + tp->ftt_size;
1409		break;
1410
1411	case FASTTRAP_T_JMP:
1412	case FASTTRAP_T_CALL:
1413		if (tp->ftt_code == 0) {
1414			new_pc = tp->ftt_dest;
1415		} else {
1416			uintptr_t value, addr = tp->ftt_dest;
1417
1418			if (tp->ftt_base != FASTTRAP_NOREG)
1419				addr += fasttrap_getreg(rp, tp->ftt_base);
1420			if (tp->ftt_index != FASTTRAP_NOREG)
1421				addr += fasttrap_getreg(rp, tp->ftt_index) <<
1422				    tp->ftt_scale;
1423
1424			if (tp->ftt_code == 1) {
1425				/*
1426				 * If there's a segment prefix for this
1427				 * instruction, we'll need to check permissions
1428				 * and bounds on the given selector, and adjust
1429				 * the address accordingly.
1430				 */
1431				if (tp->ftt_segment != FASTTRAP_SEG_NONE &&
1432				    fasttrap_do_seg(tp, rp, &addr) != 0) {
1433					fasttrap_sigsegv(p, curthread, addr);
1434					new_pc = pc;
1435					break;
1436				}
1437
1438#ifdef __amd64
1439				if (p->p_model == DATAMODEL_NATIVE) {
1440#endif
1441					if ((value = fasttrap_fulword((void *)addr))
1442					     == -1) {
1443						fasttrap_sigsegv(p, curthread,
1444						    addr);
1445						new_pc = pc;
1446						break;
1447					}
1448					new_pc = value;
1449#ifdef __amd64
1450				} else {
1451					uint32_t value32;
1452					addr = (uintptr_t)(uint32_t)addr;
1453					if ((value32 = fasttrap_fuword32((void *)addr))
1454					    == -1) {
1455						fasttrap_sigsegv(p, curthread,
1456						    addr);
1457						new_pc = pc;
1458						break;
1459					}
1460					new_pc = value32;
1461				}
1462#endif
1463			} else {
1464				new_pc = addr;
1465			}
1466		}
1467
1468		/*
1469		 * If this is a call instruction, we need to push the return
1470		 * address onto the stack. If this fails, we send the process
1471		 * a SIGSEGV and reset the pc to emulate what would happen if
1472		 * this instruction weren't traced.
1473		 */
1474		if (tp->ftt_type == FASTTRAP_T_CALL) {
1475			int ret = 0;
1476			uintptr_t addr = 0, pcps;
1477#ifdef __amd64
1478			if (p->p_model == DATAMODEL_NATIVE) {
1479				addr = rp->r_rsp - sizeof (uintptr_t);
1480				pcps = pc + tp->ftt_size;
1481				ret = fasttrap_sulword((void *)addr, pcps);
1482			} else {
1483#endif
1484				addr = rp->r_rsp - sizeof (uint32_t);
1485				pcps = (uint32_t)(pc + tp->ftt_size);
1486				ret = fasttrap_suword32((void *)addr, pcps);
1487#ifdef __amd64
1488			}
1489#endif
1490
1491			if (ret == -1) {
1492				fasttrap_sigsegv(p, curthread, addr);
1493				new_pc = pc;
1494				break;
1495			}
1496
1497			rp->r_rsp = addr;
1498		}
1499
1500		break;
1501
1502	case FASTTRAP_T_COMMON:
1503	{
1504		uintptr_t addr;
1505#if defined(__amd64)
1506		uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 22];
1507#else
1508		uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 7];
1509#endif
1510		uint_t i = 0;
1511#ifdef illumos
1512		klwp_t *lwp = ttolwp(curthread);
1513
1514		/*
1515		 * Compute the address of the ulwp_t and step over the
1516		 * ul_self pointer. The method used to store the user-land
1517		 * thread pointer is very different on 32- and 64-bit
1518		 * kernels.
1519		 */
1520#if defined(__amd64)
1521		if (p->p_model == DATAMODEL_LP64) {
1522			addr = lwp->lwp_pcb.pcb_fsbase;
1523			addr += sizeof (void *);
1524		} else {
1525			addr = lwp->lwp_pcb.pcb_gsbase;
1526			addr += sizeof (caddr32_t);
1527		}
1528#else
1529		addr = USD_GETBASE(&lwp->lwp_pcb.pcb_gsdesc);
1530		addr += sizeof (void *);
1531#endif
1532#else	/* !illumos */
1533		fasttrap_scrspace_t *scrspace;
1534		scrspace = fasttrap_scraddr(curthread, tp->ftt_proc);
1535		if (scrspace == NULL) {
1536			/*
1537			 * We failed to allocate scratch space for this thread.
1538			 * Try to write the original instruction back out and
1539			 * reset the pc.
1540			 */
1541			if (fasttrap_copyout(tp->ftt_instr, (void *)pc,
1542			    tp->ftt_size))
1543				fasttrap_sigtrap(p, curthread, pc);
1544			new_pc = pc;
1545			break;
1546		}
1547		addr = scrspace->ftss_addr;
1548#endif /* illumos */
1549
1550		/*
1551		 * Generic Instruction Tracing
1552		 * ---------------------------
1553		 *
1554		 * This is the layout of the scratch space in the user-land
1555		 * thread structure for our generated instructions.
1556		 *
1557		 *	32-bit mode			bytes
1558		 *	------------------------	-----
1559		 * a:	<original instruction>		<= 15
1560		 *	jmp	<pc + tp->ftt_size>	    5
1561		 * b:	<original instruction>		<= 15
1562		 *	int	T_DTRACE_RET		    2
1563		 *					-----
1564		 *					<= 37
1565		 *
1566		 *	64-bit mode			bytes
1567		 *	------------------------	-----
1568		 * a:	<original instruction>		<= 15
1569		 *	jmp	0(%rip)			    6
1570		 *	<pc + tp->ftt_size>		    8
1571		 * b:	<original instruction>		<= 15
1572		 * 	int	T_DTRACE_RET		    2
1573		 * 					-----
1574		 * 					<= 46
1575		 *
1576		 * The %pc is set to a, and curthread->t_dtrace_astpc is set
1577		 * to b. If we encounter a signal on the way out of the
1578		 * kernel, trap() will set %pc to curthread->t_dtrace_astpc
1579		 * so that we execute the original instruction and re-enter
1580		 * the kernel rather than redirecting to the next instruction.
1581		 *
1582		 * If there are return probes (so we know that we're going to
1583		 * need to reenter the kernel after executing the original
1584		 * instruction), the scratch space will just contain the
1585		 * original instruction followed by an interrupt -- the same
1586		 * data as at b.
1587		 *
1588		 * %rip-relative Addressing
1589		 * ------------------------
1590		 *
1591		 * There's a further complication in 64-bit mode due to %rip-
1592		 * relative addressing. While this is clearly a beneficial
1593		 * architectural decision for position independent code, it's
1594		 * hard not to see it as a personal attack against the pid
1595		 * provider since before there was a relatively small set of
1596		 * instructions to emulate; with %rip-relative addressing,
1597		 * almost every instruction can potentially depend on the
1598		 * address at which it's executed. Rather than emulating
1599		 * the broad spectrum of instructions that can now be
1600		 * position dependent, we emulate jumps and others as in
1601		 * 32-bit mode, and take a different tack for instructions
1602		 * using %rip-relative addressing.
1603		 *
1604		 * For every instruction that uses the ModRM byte, the
1605		 * in-kernel disassembler reports its location. We use the
1606		 * ModRM byte to identify that an instruction uses
1607		 * %rip-relative addressing and to see what other registers
1608		 * the instruction uses. To emulate those instructions,
1609		 * we modify the instruction to be %rax-relative rather than
1610		 * %rip-relative (or %rcx-relative if the instruction uses
1611		 * %rax; or %r8- or %r9-relative if the REX.B is present so
1612		 * we don't have to rewrite the REX prefix). We then load
1613		 * the value that %rip would have been into the scratch
1614		 * register and generate an instruction to reset the scratch
1615		 * register back to its original value. The instruction
1616		 * sequence looks like this:
1617		 *
1618		 *	64-mode %rip-relative		bytes
1619		 *	------------------------	-----
1620		 * a:	<modified instruction>		<= 15
1621		 *	movq	$<value>, %<scratch>	    6
1622		 *	jmp	0(%rip)			    6
1623		 *	<pc + tp->ftt_size>		    8
1624		 * b:	<modified instruction>  	<= 15
1625		 * 	int	T_DTRACE_RET		    2
1626		 * 					-----
1627		 *					   52
1628		 *
1629		 * We set curthread->t_dtrace_regv so that upon receiving
1630		 * a signal we can reset the value of the scratch register.
1631		 */
1632
1633		ASSERT(tp->ftt_size <= FASTTRAP_MAX_INSTR_SIZE);
1634
1635		curthread->t_dtrace_scrpc = addr;
1636		bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
1637		i += tp->ftt_size;
1638
1639#ifdef __amd64
1640		if (tp->ftt_ripmode != 0) {
1641			greg_t *reg = NULL;
1642
1643			ASSERT(p->p_model == DATAMODEL_LP64);
1644			ASSERT(tp->ftt_ripmode &
1645			    (FASTTRAP_RIP_1 | FASTTRAP_RIP_2));
1646
1647			/*
1648			 * If this was a %rip-relative instruction, we change
1649			 * it to be either a %rax- or %rcx-relative
1650			 * instruction (depending on whether those registers
1651			 * are used as another operand; or %r8- or %r9-
1652			 * relative depending on the value of REX.B). We then
1653			 * set that register and generate a movq instruction
1654			 * to reset the value.
1655			 */
1656			if (tp->ftt_ripmode & FASTTRAP_RIP_X)
1657				scratch[i++] = FASTTRAP_REX(1, 0, 0, 1);
1658			else
1659				scratch[i++] = FASTTRAP_REX(1, 0, 0, 0);
1660
1661			if (tp->ftt_ripmode & FASTTRAP_RIP_1)
1662				scratch[i++] = FASTTRAP_MOV_EAX;
1663			else
1664				scratch[i++] = FASTTRAP_MOV_ECX;
1665
1666			switch (tp->ftt_ripmode) {
1667			case FASTTRAP_RIP_1:
1668				reg = &rp->r_rax;
1669				curthread->t_dtrace_reg = REG_RAX;
1670				break;
1671			case FASTTRAP_RIP_2:
1672				reg = &rp->r_rcx;
1673				curthread->t_dtrace_reg = REG_RCX;
1674				break;
1675			case FASTTRAP_RIP_1 | FASTTRAP_RIP_X:
1676				reg = &rp->r_r8;
1677				curthread->t_dtrace_reg = REG_R8;
1678				break;
1679			case FASTTRAP_RIP_2 | FASTTRAP_RIP_X:
1680				reg = &rp->r_r9;
1681				curthread->t_dtrace_reg = REG_R9;
1682				break;
1683			}
1684
1685			/* LINTED - alignment */
1686			*(uint64_t *)&scratch[i] = *reg;
1687			curthread->t_dtrace_regv = *reg;
1688			*reg = pc + tp->ftt_size;
1689			i += sizeof (uint64_t);
1690		}
1691#endif
1692
1693		/*
1694		 * Generate the branch instruction to what would have
1695		 * normally been the subsequent instruction. In 32-bit mode,
1696		 * this is just a relative branch; in 64-bit mode this is a
1697		 * %rip-relative branch that loads the 64-bit pc value
1698		 * immediately after the jmp instruction.
1699		 */
1700#ifdef __amd64
1701		if (p->p_model == DATAMODEL_LP64) {
1702			scratch[i++] = FASTTRAP_GROUP5_OP;
1703			scratch[i++] = FASTTRAP_MODRM(0, 4, 5);
1704			/* LINTED - alignment */
1705			*(uint32_t *)&scratch[i] = 0;
1706			i += sizeof (uint32_t);
1707			/* LINTED - alignment */
1708			*(uint64_t *)&scratch[i] = pc + tp->ftt_size;
1709			i += sizeof (uint64_t);
1710		} else {
1711#endif
1712#ifdef __i386__
1713			/*
1714			 * Set up the jmp to the next instruction; note that
1715			 * the size of the traced instruction cancels out.
1716			 */
1717			scratch[i++] = FASTTRAP_JMP32;
1718			/* LINTED - alignment */
1719			*(uint32_t *)&scratch[i] = pc - addr - 5;
1720			i += sizeof (uint32_t);
1721#endif
1722#ifdef __amd64
1723		}
1724#endif
1725
1726		curthread->t_dtrace_astpc = addr + i;
1727		bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
1728		i += tp->ftt_size;
1729		scratch[i++] = FASTTRAP_INT;
1730		scratch[i++] = T_DTRACE_RET;
1731
1732		ASSERT(i <= sizeof (scratch));
1733
1734		if (fasttrap_copyout(scratch, (char *)addr, i)) {
1735			fasttrap_sigtrap(p, curthread, pc);
1736			new_pc = pc;
1737			break;
1738		}
1739		if (tp->ftt_retids != NULL) {
1740			curthread->t_dtrace_step = 1;
1741			curthread->t_dtrace_ret = 1;
1742			new_pc = curthread->t_dtrace_astpc;
1743		} else {
1744			new_pc = curthread->t_dtrace_scrpc;
1745		}
1746
1747		curthread->t_dtrace_pc = pc;
1748		curthread->t_dtrace_npc = pc + tp->ftt_size;
1749		curthread->t_dtrace_on = 1;
1750		break;
1751	}
1752
1753	default:
1754		panic("fasttrap: mishandled an instruction");
1755	}
1756
1757done:
1758	/*
1759	 * If there were no return probes when we first found the tracepoint,
1760	 * we should feel no obligation to honor any return probes that were
1761	 * subsequently enabled -- they'll just have to wait until the next
1762	 * time around.
1763	 */
1764	if (tp->ftt_retids != NULL) {
1765		/*
1766		 * We need to wait until the results of the instruction are
1767		 * apparent before invoking any return probes. If this
1768		 * instruction was emulated we can just call
1769		 * fasttrap_return_common(); if it needs to be executed, we
1770		 * need to wait until the user thread returns to the kernel.
1771		 */
1772		if (tp->ftt_type != FASTTRAP_T_COMMON) {
1773			/*
1774			 * Set the program counter to the address of the traced
1775			 * instruction so that it looks right in ustack()
1776			 * output. We had previously set it to the end of the
1777			 * instruction to simplify %rip-relative addressing.
1778			 */
1779			rp->r_rip = pc;
1780
1781			fasttrap_return_common(rp, pc, pid, new_pc);
1782		} else {
1783			ASSERT(curthread->t_dtrace_ret != 0);
1784			ASSERT(curthread->t_dtrace_pc == pc);
1785			ASSERT(curthread->t_dtrace_scrpc != 0);
1786			ASSERT(new_pc == curthread->t_dtrace_astpc);
1787		}
1788	}
1789
1790	rp->r_rip = new_pc;
1791
1792#ifndef illumos
1793	PROC_LOCK(p);
1794	proc_write_regs(curthread, rp);
1795	PROC_UNLOCK(p);
1796#endif
1797
1798	return (0);
1799}
1800
1801int
1802fasttrap_return_probe(struct trapframe *tf)
1803{
1804	struct reg reg, *rp;
1805	proc_t *p = curproc;
1806	uintptr_t pc = curthread->t_dtrace_pc;
1807	uintptr_t npc = curthread->t_dtrace_npc;
1808
1809	fill_frame_regs(tf, &reg);
1810	rp = &reg;
1811
1812	curthread->t_dtrace_pc = 0;
1813	curthread->t_dtrace_npc = 0;
1814	curthread->t_dtrace_scrpc = 0;
1815	curthread->t_dtrace_astpc = 0;
1816
1817#ifdef illumos
1818	/*
1819	 * Treat a child created by a call to vfork(2) as if it were its
1820	 * parent. We know that there's only one thread of control in such a
1821	 * process: this one.
1822	 */
1823	while (p->p_flag & SVFORK) {
1824		p = p->p_parent;
1825	}
1826#endif
1827
1828	/*
1829	 * We set rp->r_rip to the address of the traced instruction so
1830	 * that it appears to dtrace_probe() that we're on the original
1831	 * instruction.
1832	 */
1833	rp->r_rip = pc;
1834
1835	fasttrap_return_common(rp, pc, p->p_pid, npc);
1836
1837	return (0);
1838}
1839
1840/*ARGSUSED*/
1841uint64_t
1842fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1843    int aframes)
1844{
1845	struct reg r;
1846
1847	fill_regs(curthread, &r);
1848
1849	return (fasttrap_anarg(&r, 1, argno));
1850}
1851
1852/*ARGSUSED*/
1853uint64_t
1854fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1855    int aframes)
1856{
1857	struct reg r;
1858
1859	fill_regs(curthread, &r);
1860
1861	return (fasttrap_anarg(&r, 0, argno));
1862}
1863
1864static ulong_t
1865fasttrap_getreg(struct reg *rp, uint_t reg)
1866{
1867#ifdef __amd64
1868	switch (reg) {
1869	case REG_R15:		return (rp->r_r15);
1870	case REG_R14:		return (rp->r_r14);
1871	case REG_R13:		return (rp->r_r13);
1872	case REG_R12:		return (rp->r_r12);
1873	case REG_R11:		return (rp->r_r11);
1874	case REG_R10:		return (rp->r_r10);
1875	case REG_R9:		return (rp->r_r9);
1876	case REG_R8:		return (rp->r_r8);
1877	case REG_RDI:		return (rp->r_rdi);
1878	case REG_RSI:		return (rp->r_rsi);
1879	case REG_RBP:		return (rp->r_rbp);
1880	case REG_RBX:		return (rp->r_rbx);
1881	case REG_RDX:		return (rp->r_rdx);
1882	case REG_RCX:		return (rp->r_rcx);
1883	case REG_RAX:		return (rp->r_rax);
1884	case REG_TRAPNO:	return (rp->r_trapno);
1885	case REG_ERR:		return (rp->r_err);
1886	case REG_RIP:		return (rp->r_rip);
1887	case REG_CS:		return (rp->r_cs);
1888#ifdef illumos
1889	case REG_RFL:		return (rp->r_rfl);
1890#endif
1891	case REG_RSP:		return (rp->r_rsp);
1892	case REG_SS:		return (rp->r_ss);
1893	case REG_FS:		return (rp->r_fs);
1894	case REG_GS:		return (rp->r_gs);
1895	case REG_DS:		return (rp->r_ds);
1896	case REG_ES:		return (rp->r_es);
1897	case REG_FSBASE:	return (rdmsr(MSR_FSBASE));
1898	case REG_GSBASE:	return (rdmsr(MSR_GSBASE));
1899	}
1900
1901	panic("dtrace: illegal register constant");
1902	/*NOTREACHED*/
1903#else
1904#define _NGREG 19
1905	if (reg >= _NGREG)
1906		panic("dtrace: illegal register constant");
1907
1908	return (((greg_t *)&rp->r_gs)[reg]);
1909#endif
1910}
1911