1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 *
22 * $FreeBSD$
23 */
24/*
25 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28#include <sys/cdefs.h>
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/stack.h>
34#include <sys/pcpu.h>
35
36#include <machine/frame.h>
37#include <machine/md_var.h>
38#include <machine/pcb.h>
39#include <machine/stack.h>
40
41#include <vm/vm.h>
42#include <vm/vm_param.h>
43#include <vm/pmap.h>
44
45#include "regset.h"
46
47extern uintptr_t kernbase;
48uintptr_t kernelbase = (uintptr_t) &kernbase;
49
50uint8_t dtrace_fuword8_nocheck(void *);
51uint16_t dtrace_fuword16_nocheck(void *);
52uint32_t dtrace_fuword32_nocheck(void *);
53uint64_t dtrace_fuword64_nocheck(void *);
54
55int	dtrace_ustackdepth_max = 2048;
56
57void
58dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
59    uint32_t *intrpc)
60{
61	int depth = 0;
62	register_t ebp;
63	struct i386_frame *frame;
64	vm_offset_t callpc;
65	pc_t caller = (pc_t) solaris_cpu[curcpu].cpu_dtrace_caller;
66
67	if (intrpc != 0)
68		pcstack[depth++] = (pc_t) intrpc;
69
70	aframes++;
71
72	__asm __volatile("movl %%ebp,%0" : "=r" (ebp));
73
74	frame = (struct i386_frame *)ebp;
75	while (depth < pcstack_limit) {
76		if (!INKERNEL(frame))
77			break;
78
79		callpc = frame->f_retaddr;
80
81		if (!INKERNEL(callpc))
82			break;
83
84		if (aframes > 0) {
85			aframes--;
86			if ((aframes == 0) && (caller != 0)) {
87				pcstack[depth++] = caller;
88			}
89		}
90		else {
91			pcstack[depth++] = callpc;
92		}
93
94		if (frame->f_frame <= frame ||
95		    (vm_offset_t)frame->f_frame >= curthread->td_kstack +
96		    curthread->td_kstack_pages * PAGE_SIZE)
97			break;
98		frame = frame->f_frame;
99	}
100
101	for (; depth < pcstack_limit; depth++) {
102		pcstack[depth] = 0;
103	}
104}
105
106static int
107dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc,
108    uintptr_t sp)
109{
110#ifdef notyet
111	proc_t *p = curproc;
112	uintptr_t oldcontext = lwp->lwp_oldcontext; /* XXX signal stack. */
113	size_t s1, s2;
114#endif
115	uintptr_t oldsp;
116	volatile uint16_t *flags =
117	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
118	int ret = 0;
119
120	ASSERT(pcstack == NULL || pcstack_limit > 0);
121	ASSERT(dtrace_ustackdepth_max > 0);
122
123#ifdef notyet /* XXX signal stack. */
124	if (p->p_model == DATAMODEL_NATIVE) {
125		s1 = sizeof (struct frame) + 2 * sizeof (long);
126		s2 = s1 + sizeof (siginfo_t);
127	} else {
128		s1 = sizeof (struct frame32) + 3 * sizeof (int);
129		s2 = s1 + sizeof (siginfo32_t);
130	}
131#endif
132
133	while (pc != 0) {
134		/*
135		 * We limit the number of times we can go around this
136		 * loop to account for a circular stack.
137		 */
138		if (ret++ >= dtrace_ustackdepth_max) {
139			*flags |= CPU_DTRACE_BADSTACK;
140			cpu_core[curcpu].cpuc_dtrace_illval = sp;
141			break;
142		}
143
144		if (pcstack != NULL) {
145			*pcstack++ = (uint64_t)pc;
146			pcstack_limit--;
147			if (pcstack_limit <= 0)
148				break;
149		}
150
151		if (sp == 0)
152			break;
153
154		oldsp = sp;
155
156#ifdef notyet /* XXX signal stack. */
157		if (oldcontext == sp + s1 || oldcontext == sp + s2) {
158			if (p->p_model == DATAMODEL_NATIVE) {
159				ucontext_t *ucp = (ucontext_t *)oldcontext;
160				greg_t *gregs = ucp->uc_mcontext.gregs;
161
162				sp = dtrace_fulword(&gregs[REG_FP]);
163				pc = dtrace_fulword(&gregs[REG_PC]);
164
165				oldcontext = dtrace_fulword(&ucp->uc_link);
166			} else {
167				ucontext32_t *ucp = (ucontext32_t *)oldcontext;
168				greg32_t *gregs = ucp->uc_mcontext.gregs;
169
170				sp = dtrace_fuword32(&gregs[EBP]);
171				pc = dtrace_fuword32(&gregs[EIP]);
172
173				oldcontext = dtrace_fuword32(&ucp->uc_link);
174			}
175		} else {
176			if (p->p_model == DATAMODEL_NATIVE) {
177				struct frame *fr = (struct frame *)sp;
178
179				pc = dtrace_fulword(&fr->fr_savpc);
180				sp = dtrace_fulword(&fr->fr_savfp);
181			} else {
182				struct frame32 *fr = (struct frame32 *)sp;
183
184				pc = dtrace_fuword32(&fr->fr_savpc);
185				sp = dtrace_fuword32(&fr->fr_savfp);
186			}
187		}
188#else
189		pc = dtrace_fuword32((void *)(sp +
190			offsetof(struct i386_frame, f_retaddr)));
191		sp = dtrace_fuword32((void *)sp);
192#endif /* ! notyet */
193
194		if (sp == oldsp) {
195			*flags |= CPU_DTRACE_BADSTACK;
196			cpu_core[curcpu].cpuc_dtrace_illval = sp;
197			break;
198		}
199
200		/*
201		 * This is totally bogus:  if we faulted, we're going to clear
202		 * the fault and break.  This is to deal with the apparently
203		 * broken Java stacks on x86.
204		 */
205		if (*flags & CPU_DTRACE_FAULT) {
206			*flags &= ~CPU_DTRACE_FAULT;
207			break;
208		}
209	}
210
211	return (ret);
212}
213
214void
215dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
216{
217	proc_t *p = curproc;
218	struct trapframe *tf;
219	uintptr_t pc, sp, fp;
220	volatile uint16_t *flags =
221	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
222	int n;
223
224	if (*flags & CPU_DTRACE_FAULT)
225		return;
226
227	if (pcstack_limit <= 0)
228		return;
229
230	/*
231	 * If there's no user context we still need to zero the stack.
232	 */
233	if (p == NULL || (tf = curthread->td_frame) == NULL)
234		goto zero;
235
236	*pcstack++ = (uint64_t)p->p_pid;
237	pcstack_limit--;
238
239	if (pcstack_limit <= 0)
240		return;
241
242	pc = tf->tf_eip;
243	fp = tf->tf_ebp;
244	sp = tf->tf_esp;
245
246	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
247		/*
248		 * In an entry probe.  The frame pointer has not yet been
249		 * pushed (that happens in the function prologue).  The
250		 * best approach is to add the current pc as a missing top
251		 * of stack and back the pc up to the caller, which is stored
252		 * at the current stack pointer address since the call
253		 * instruction puts it there right before the branch.
254		 */
255
256		*pcstack++ = (uint64_t)pc;
257		pcstack_limit--;
258		if (pcstack_limit <= 0)
259			return;
260
261		pc = dtrace_fuword32((void *) sp);
262	}
263
264	n = dtrace_getustack_common(pcstack, pcstack_limit, pc, sp);
265	ASSERT(n >= 0);
266	ASSERT(n <= pcstack_limit);
267
268	pcstack += n;
269	pcstack_limit -= n;
270
271zero:
272	while (pcstack_limit-- > 0)
273		*pcstack++ = 0;
274}
275
276int
277dtrace_getustackdepth(void)
278{
279	proc_t *p = curproc;
280	struct trapframe *tf;
281	uintptr_t pc, fp, sp;
282	int n = 0;
283
284	if (p == NULL || (tf = curthread->td_frame) == NULL)
285		return (0);
286
287	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
288		return (-1);
289
290	pc = tf->tf_eip;
291	fp = tf->tf_ebp;
292	sp = tf->tf_esp;
293
294	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
295		/*
296		 * In an entry probe.  The frame pointer has not yet been
297		 * pushed (that happens in the function prologue).  The
298		 * best approach is to add the current pc as a missing top
299		 * of stack and back the pc up to the caller, which is stored
300		 * at the current stack pointer address since the call
301		 * instruction puts it there right before the branch.
302		 */
303
304		pc = dtrace_fuword32((void *) sp);
305		n++;
306	}
307
308	n += dtrace_getustack_common(NULL, 0, pc, fp);
309
310	return (n);
311}
312
313void
314dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit)
315{
316	proc_t *p = curproc;
317	struct trapframe *tf;
318	uintptr_t pc, sp, fp;
319	volatile uint16_t *flags =
320	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
321#ifdef notyet /* XXX signal stack */
322	uintptr_t oldcontext;
323	size_t s1, s2;
324#endif
325
326	if (*flags & CPU_DTRACE_FAULT)
327		return;
328
329	if (pcstack_limit <= 0)
330		return;
331
332	/*
333	 * If there's no user context we still need to zero the stack.
334	 */
335	if (p == NULL || (tf = curthread->td_frame) == NULL)
336		goto zero;
337
338	*pcstack++ = (uint64_t)p->p_pid;
339	pcstack_limit--;
340
341	if (pcstack_limit <= 0)
342		return;
343
344	pc = tf->tf_eip;
345	fp = tf->tf_ebp;
346	sp = tf->tf_esp;
347
348#ifdef notyet /* XXX signal stack */
349	oldcontext = lwp->lwp_oldcontext;
350
351	if (p->p_model == DATAMODEL_NATIVE) {
352		s1 = sizeof (struct frame) + 2 * sizeof (long);
353		s2 = s1 + sizeof (siginfo_t);
354	} else {
355		s1 = sizeof (struct frame32) + 3 * sizeof (int);
356		s2 = s1 + sizeof (siginfo32_t);
357	}
358#endif
359
360	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
361		*pcstack++ = (uint64_t)pc;
362		*fpstack++ = 0;
363		pcstack_limit--;
364		if (pcstack_limit <= 0)
365			return;
366
367		pc = dtrace_fuword32((void *)sp);
368	}
369
370	while (pc != 0) {
371		*pcstack++ = (uint64_t)pc;
372		*fpstack++ = fp;
373		pcstack_limit--;
374		if (pcstack_limit <= 0)
375			break;
376
377		if (fp == 0)
378			break;
379
380#ifdef notyet /* XXX signal stack */
381		if (oldcontext == sp + s1 || oldcontext == sp + s2) {
382			if (p->p_model == DATAMODEL_NATIVE) {
383				ucontext_t *ucp = (ucontext_t *)oldcontext;
384				greg_t *gregs = ucp->uc_mcontext.gregs;
385
386				sp = dtrace_fulword(&gregs[REG_FP]);
387				pc = dtrace_fulword(&gregs[REG_PC]);
388
389				oldcontext = dtrace_fulword(&ucp->uc_link);
390			} else {
391				ucontext_t *ucp = (ucontext_t *)oldcontext;
392				greg_t *gregs = ucp->uc_mcontext.gregs;
393
394				sp = dtrace_fuword32(&gregs[EBP]);
395				pc = dtrace_fuword32(&gregs[EIP]);
396
397				oldcontext = dtrace_fuword32(&ucp->uc_link);
398			}
399		} else
400#endif /* XXX */
401		{
402			pc = dtrace_fuword32((void *)(fp +
403				offsetof(struct i386_frame, f_retaddr)));
404			fp = dtrace_fuword32((void *)fp);
405		}
406
407		/*
408		 * This is totally bogus:  if we faulted, we're going to clear
409		 * the fault and break.  This is to deal with the apparently
410		 * broken Java stacks on x86.
411		 */
412		if (*flags & CPU_DTRACE_FAULT) {
413			*flags &= ~CPU_DTRACE_FAULT;
414			break;
415		}
416	}
417
418zero:
419	while (pcstack_limit-- > 0)
420		*pcstack++ = 0;
421}
422
423uint64_t
424dtrace_getarg(int arg, int aframes)
425{
426	struct trapframe *frame;
427	struct i386_frame *fp = (struct i386_frame *)dtrace_getfp();
428	uintptr_t *stack, val;
429	int i;
430
431	for (i = 1; i <= aframes; i++) {
432		fp = fp->f_frame;
433
434		if (P2ROUNDUP(fp->f_retaddr, 4) ==
435		    (long)dtrace_invop_callsite) {
436			/*
437			 * If we pass through the invalid op handler, we will
438			 * use the trap frame pointer that it pushed on the
439			 * stack as the second argument to dtrace_invop() as
440			 * the pointer to the stack.  When using this stack, we
441			 * must skip the third argument to dtrace_invop(),
442			 * which is included in the i386_frame.
443			 */
444			frame = (struct trapframe *)(((uintptr_t **)&fp[1])[0]);
445			/*
446			 * Skip the three hardware-saved registers and the
447			 * return address.
448			 */
449			stack = (uintptr_t *)frame->tf_isp + 4;
450			goto load;
451		}
452
453	}
454
455	/*
456	 * We know that we did not come through a trap to get into
457	 * dtrace_probe() -- the provider simply called dtrace_probe()
458	 * directly.  As this is the case, we need to shift the argument
459	 * that we're looking for:  the probe ID is the first argument to
460	 * dtrace_probe(), so the argument n will actually be found where
461	 * one would expect to find argument (n + 1).
462	 */
463	arg++;
464
465	stack = (uintptr_t *)fp + 2;
466
467load:
468	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
469	val = stack[arg];
470	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
471
472	return (val);
473}
474
475int
476dtrace_getstackdepth(int aframes)
477{
478	int depth = 0;
479	struct i386_frame *frame;
480	vm_offset_t ebp;
481
482	aframes++;
483	ebp = dtrace_getfp();
484	frame = (struct i386_frame *)ebp;
485	depth++;
486	for(;;) {
487		if (!INKERNEL((long) frame))
488			break;
489		if (!INKERNEL((long) frame->f_frame))
490			break;
491		depth++;
492		if (frame->f_frame <= frame ||
493		    (vm_offset_t)frame->f_frame >= curthread->td_kstack +
494		    curthread->td_kstack_pages * PAGE_SIZE)
495			break;
496		frame = frame->f_frame;
497	}
498	if (depth < aframes)
499		return 0;
500	else
501		return depth - aframes;
502}
503
504ulong_t
505dtrace_getreg(struct trapframe *rp, uint_t reg)
506{
507	struct pcb *pcb;
508	int regmap[] = {  /* Order is dependent on reg.d */
509		REG_GS,		/* 0  GS */
510		REG_FS,		/* 1  FS */
511		REG_ES,		/* 2  ES */
512		REG_DS,		/* 3  DS */
513		REG_RDI,	/* 4  EDI */
514		REG_RSI,	/* 5  ESI */
515		REG_RBP,	/* 6  EBP, REG_FP */
516		REG_RSP,	/* 7  ESP */
517		REG_RBX,	/* 8  EBX */
518		REG_RDX,	/* 9  EDX, REG_R1 */
519		REG_RCX,	/* 10 ECX */
520		REG_RAX,	/* 11 EAX, REG_R0 */
521		REG_TRAPNO,	/* 12 TRAPNO */
522		REG_ERR,	/* 13 ERR */
523		REG_RIP,	/* 14 EIP, REG_PC */
524		REG_CS,		/* 15 CS */
525		REG_RFL,	/* 16 EFL, REG_PS */
526		REG_RSP,	/* 17 UESP, REG_SP */
527		REG_SS		/* 18 SS */
528	};
529
530	if (reg > SS) {
531		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
532		return (0);
533	}
534
535	if (reg >= sizeof (regmap) / sizeof (int)) {
536		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
537		return (0);
538	}
539
540	reg = regmap[reg];
541
542	switch(reg) {
543	case REG_GS:
544		if ((pcb = curthread->td_pcb) == NULL) {
545			DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
546			return (0);
547		}
548		return (pcb->pcb_gs);
549	case REG_FS:
550		return (rp->tf_fs);
551	case REG_ES:
552		return (rp->tf_es);
553	case REG_DS:
554		return (rp->tf_ds);
555	case REG_RDI:
556		return (rp->tf_edi);
557	case REG_RSI:
558		return (rp->tf_esi);
559	case REG_RBP:
560		return (rp->tf_ebp);
561	case REG_RSP:
562		return (rp->tf_isp);
563	case REG_RBX:
564		return (rp->tf_ebx);
565	case REG_RCX:
566		return (rp->tf_ecx);
567	case REG_RAX:
568		return (rp->tf_eax);
569	case REG_TRAPNO:
570		return (rp->tf_trapno);
571	case REG_ERR:
572		return (rp->tf_err);
573	case REG_RIP:
574		return (rp->tf_eip);
575	case REG_CS:
576		return (rp->tf_cs);
577	case REG_RFL:
578		return (rp->tf_eflags);
579#if 0
580	case REG_RSP:
581		return (rp->tf_esp);
582#endif
583	case REG_SS:
584		return (rp->tf_ss);
585	default:
586		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
587		return (0);
588	}
589}
590
591static int
592dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size)
593{
594	ASSERT(kaddr >= kernelbase && kaddr + size >= kaddr);
595
596	if (uaddr + size >= kernelbase || uaddr + size < uaddr) {
597		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
598		cpu_core[curcpu].cpuc_dtrace_illval = uaddr;
599		return (0);
600	}
601
602	return (1);
603}
604
605void
606dtrace_copyin(uintptr_t uaddr, uintptr_t kaddr, size_t size,
607    volatile uint16_t *flags)
608{
609	if (dtrace_copycheck(uaddr, kaddr, size))
610		dtrace_copy(uaddr, kaddr, size);
611}
612
613void
614dtrace_copyout(uintptr_t kaddr, uintptr_t uaddr, size_t size,
615    volatile uint16_t *flags)
616{
617	if (dtrace_copycheck(uaddr, kaddr, size))
618		dtrace_copy(kaddr, uaddr, size);
619}
620
621void
622dtrace_copyinstr(uintptr_t uaddr, uintptr_t kaddr, size_t size,
623    volatile uint16_t *flags)
624{
625	if (dtrace_copycheck(uaddr, kaddr, size))
626		dtrace_copystr(uaddr, kaddr, size, flags);
627}
628
629void
630dtrace_copyoutstr(uintptr_t kaddr, uintptr_t uaddr, size_t size,
631    volatile uint16_t *flags)
632{
633	if (dtrace_copycheck(uaddr, kaddr, size))
634		dtrace_copystr(kaddr, uaddr, size, flags);
635}
636
637uint8_t
638dtrace_fuword8(void *uaddr)
639{
640	if ((uintptr_t)uaddr >= kernelbase) {
641		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
642		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
643		return (0);
644	}
645	return (dtrace_fuword8_nocheck(uaddr));
646}
647
648uint16_t
649dtrace_fuword16(void *uaddr)
650{
651	if ((uintptr_t)uaddr >= kernelbase) {
652		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
653		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
654		return (0);
655	}
656	return (dtrace_fuword16_nocheck(uaddr));
657}
658
659uint32_t
660dtrace_fuword32(void *uaddr)
661{
662	if ((uintptr_t)uaddr >= kernelbase) {
663		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
664		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
665		return (0);
666	}
667	return (dtrace_fuword32_nocheck(uaddr));
668}
669
670uint64_t
671dtrace_fuword64(void *uaddr)
672{
673	if ((uintptr_t)uaddr >= kernelbase) {
674		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
675		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
676		return (0);
677	}
678	return (dtrace_fuword64_nocheck(uaddr));
679}
680