1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 *
22 * Portions Copyright 2012,2013 Justin Hibbits <jhibbits@freebsd.org>
23 *
24 * $FreeBSD$
25 */
26/*
27 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
28 * Use is subject to license terms.
29 */
30#include <sys/cdefs.h>
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/kernel.h>
35#include <sys/stack.h>
36#include <sys/sysent.h>
37#include <sys/pcpu.h>
38
39#include <machine/frame.h>
40#include <machine/md_var.h>
41#include <machine/psl.h>
42#include <machine/reg.h>
43#include <machine/stack.h>
44
45#include <vm/vm.h>
46#include <vm/vm_param.h>
47#include <vm/pmap.h>
48
49#include "regset.h"
50
51/* Offset to the LR Save word (ppc32) */
52#define RETURN_OFFSET	4
53/* Offset to LR Save word (ppc64).  CR Save area sits between back chain and LR */
54#define RETURN_OFFSET64	16
55
56#ifdef __powerpc64__
57#define OFFSET 4 /* Account for the TOC reload slot */
58#define	FRAME_OFFSET	48
59#else
60#define OFFSET 0
61#define	FRAME_OFFSET	8
62#endif
63
64#define INKERNEL(x)	(((x) <= VM_MAX_KERNEL_ADDRESS && \
65		(x) >= VM_MIN_KERNEL_ADDRESS) || \
66		(PMAP_HAS_DMAP && (x) >= DMAP_BASE_ADDRESS && \
67		 (x) <= DMAP_MAX_ADDRESS))
68
69static __inline int
70dtrace_sp_inkernel(uintptr_t sp)
71{
72	struct trapframe *frame;
73	vm_offset_t callpc;
74
75	/* Not within the kernel, or not aligned. */
76	if (!INKERNEL(sp) || (sp & 0xf) != 0)
77		return (0);
78#ifdef __powerpc64__
79	callpc = *(vm_offset_t *)(sp + RETURN_OFFSET64);
80#else
81	callpc = *(vm_offset_t *)(sp + RETURN_OFFSET);
82#endif
83	if ((callpc & 3) || (callpc < 0x100))
84		return (0);
85
86	/*
87	 * trapexit() and asttrapexit() are sentinels
88	 * for kernel stack tracing.
89	 */
90	if (callpc + OFFSET == (vm_offset_t) &trapexit ||
91	    callpc + OFFSET == (vm_offset_t) &asttrapexit) {
92		frame = (struct trapframe *)(sp + FRAME_OFFSET);
93
94		return ((frame->srr1 & PSL_PR) == 0);
95	}
96
97	return (1);
98}
99
100static __inline void
101dtrace_next_sp_pc(uintptr_t sp, uintptr_t *nsp, uintptr_t *pc)
102{
103	vm_offset_t callpc;
104	struct trapframe *frame;
105
106#ifdef __powerpc64__
107	callpc = *(vm_offset_t *)(sp + RETURN_OFFSET64);
108#else
109	callpc = *(vm_offset_t *)(sp + RETURN_OFFSET);
110#endif
111
112	/*
113	 * trapexit() and asttrapexit() are sentinels
114	 * for kernel stack tracing.
115	 */
116	if ((callpc + OFFSET == (vm_offset_t) &trapexit ||
117	    callpc + OFFSET == (vm_offset_t) &asttrapexit)) {
118		/* Access the trap frame */
119		frame = (struct trapframe *)(sp + FRAME_OFFSET);
120
121		if (nsp != NULL)
122			*nsp = frame->fixreg[1];
123		if (pc != NULL)
124			*pc = frame->srr0;
125		return;
126	}
127
128	if (nsp != NULL)
129		*nsp = *(uintptr_t *)sp;
130	if (pc != NULL)
131		*pc = callpc;
132}
133
134void
135dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
136    uint32_t *intrpc)
137{
138	int depth = 0;
139	uintptr_t osp, sp;
140	vm_offset_t callpc;
141	pc_t caller = (pc_t) solaris_cpu[curcpu].cpu_dtrace_caller;
142
143	osp = PAGE_SIZE;
144	if (intrpc != 0)
145		pcstack[depth++] = (pc_t) intrpc;
146
147	aframes++;
148
149	sp = (uintptr_t)__builtin_frame_address(0);
150
151	while (depth < pcstack_limit) {
152		if (sp <= osp)
153			break;
154
155		if (!dtrace_sp_inkernel(sp))
156			break;
157		osp = sp;
158		dtrace_next_sp_pc(osp, &sp, &callpc);
159
160		if (aframes > 0) {
161			aframes--;
162			if ((aframes == 0) && (caller != 0)) {
163				pcstack[depth++] = caller;
164			}
165		}
166		else {
167			pcstack[depth++] = callpc;
168		}
169	}
170
171	for (; depth < pcstack_limit; depth++) {
172		pcstack[depth] = 0;
173	}
174}
175
176static int
177dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc,
178    uintptr_t sp)
179{
180	proc_t *p = curproc;
181	int ret = 0;
182
183	ASSERT(pcstack == NULL || pcstack_limit > 0);
184
185	while (pc != 0) {
186		ret++;
187		if (pcstack != NULL) {
188			*pcstack++ = (uint64_t)pc;
189			pcstack_limit--;
190			if (pcstack_limit <= 0)
191				break;
192		}
193
194		if (sp == 0)
195			break;
196
197		if (SV_PROC_FLAG(p, SV_ILP32)) {
198			pc = dtrace_fuword32((void *)(sp + RETURN_OFFSET));
199			sp = dtrace_fuword32((void *)sp);
200		}
201		else {
202			pc = dtrace_fuword64((void *)(sp + RETURN_OFFSET64));
203			sp = dtrace_fuword64((void *)sp);
204		}
205	}
206
207	return (ret);
208}
209
210void
211dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
212{
213	proc_t *p = curproc;
214	struct trapframe *tf;
215	uintptr_t pc, sp;
216	volatile uint16_t *flags =
217	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
218	int n;
219
220	if (*flags & CPU_DTRACE_FAULT)
221		return;
222
223	if (pcstack_limit <= 0)
224		return;
225
226	/*
227	 * If there's no user context we still need to zero the stack.
228	 */
229	if (p == NULL || (tf = curthread->td_frame) == NULL)
230		goto zero;
231
232	*pcstack++ = (uint64_t)p->p_pid;
233	pcstack_limit--;
234
235	if (pcstack_limit <= 0)
236		return;
237
238	pc = tf->srr0;
239	sp = tf->fixreg[1];
240
241	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
242		/*
243		 * In an entry probe.  The frame pointer has not yet been
244		 * pushed (that happens in the function prologue).  The
245		 * best approach is to add the current pc as a missing top
246		 * of stack and back the pc up to the caller, which is stored
247		 * at the current stack pointer address since the call
248		 * instruction puts it there right before the branch.
249		 */
250
251		*pcstack++ = (uint64_t)pc;
252		pcstack_limit--;
253		if (pcstack_limit <= 0)
254			return;
255
256		pc = tf->lr;
257	}
258
259	n = dtrace_getustack_common(pcstack, pcstack_limit, pc, sp);
260	ASSERT(n >= 0);
261	ASSERT(n <= pcstack_limit);
262
263	pcstack += n;
264	pcstack_limit -= n;
265
266zero:
267	while (pcstack_limit-- > 0)
268		*pcstack++ = 0;
269}
270
271int
272dtrace_getustackdepth(void)
273{
274	proc_t *p = curproc;
275	struct trapframe *tf;
276	uintptr_t pc, sp;
277	int n = 0;
278
279	if (p == NULL || (tf = curthread->td_frame) == NULL)
280		return (0);
281
282	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
283		return (-1);
284
285	pc = tf->srr0;
286	sp = tf->fixreg[1];
287
288	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
289		/*
290		 * In an entry probe.  The frame pointer has not yet been
291		 * pushed (that happens in the function prologue).  The
292		 * best approach is to add the current pc as a missing top
293		 * of stack and back the pc up to the caller, which is stored
294		 * at the current stack pointer address since the call
295		 * instruction puts it there right before the branch.
296		 */
297
298		if (SV_PROC_FLAG(p, SV_ILP32)) {
299			pc = dtrace_fuword32((void *) sp);
300		}
301		else
302			pc = dtrace_fuword64((void *) sp);
303		n++;
304	}
305
306	n += dtrace_getustack_common(NULL, 0, pc, sp);
307
308	return (n);
309}
310
311void
312dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit)
313{
314	proc_t *p = curproc;
315	struct trapframe *tf;
316	uintptr_t pc, sp;
317	volatile uint16_t *flags =
318	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
319#ifdef notyet	/* XXX signal stack */
320	uintptr_t oldcontext;
321	size_t s1, s2;
322#endif
323
324	if (*flags & CPU_DTRACE_FAULT)
325		return;
326
327	if (pcstack_limit <= 0)
328		return;
329
330	/*
331	 * If there's no user context we still need to zero the stack.
332	 */
333	if (p == NULL || (tf = curthread->td_frame) == NULL)
334		goto zero;
335
336	*pcstack++ = (uint64_t)p->p_pid;
337	pcstack_limit--;
338
339	if (pcstack_limit <= 0)
340		return;
341
342	pc = tf->srr0;
343	sp = tf->fixreg[1];
344
345#ifdef notyet /* XXX signal stack */
346	oldcontext = lwp->lwp_oldcontext;
347	s1 = sizeof (struct xframe) + 2 * sizeof (long);
348	s2 = s1 + sizeof (siginfo_t);
349#endif
350
351	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
352		*pcstack++ = (uint64_t)pc;
353		*fpstack++ = 0;
354		pcstack_limit--;
355		if (pcstack_limit <= 0)
356			return;
357
358		if (SV_PROC_FLAG(p, SV_ILP32)) {
359			pc = dtrace_fuword32((void *)sp);
360		}
361		else {
362			pc = dtrace_fuword64((void *)sp);
363		}
364	}
365
366	while (pc != 0) {
367		*pcstack++ = (uint64_t)pc;
368		*fpstack++ = sp;
369		pcstack_limit--;
370		if (pcstack_limit <= 0)
371			break;
372
373		if (sp == 0)
374			break;
375
376#ifdef notyet /* XXX signal stack */
377		if (oldcontext == sp + s1 || oldcontext == sp + s2) {
378			ucontext_t *ucp = (ucontext_t *)oldcontext;
379			greg_t *gregs = ucp->uc_mcontext.gregs;
380
381			sp = dtrace_fulword(&gregs[REG_FP]);
382			pc = dtrace_fulword(&gregs[REG_PC]);
383
384			oldcontext = dtrace_fulword(&ucp->uc_link);
385		} else
386#endif /* XXX */
387		{
388			if (SV_PROC_FLAG(p, SV_ILP32)) {
389				pc = dtrace_fuword32((void *)(sp + RETURN_OFFSET));
390				sp = dtrace_fuword32((void *)sp);
391			}
392			else {
393				pc = dtrace_fuword64((void *)(sp + RETURN_OFFSET64));
394				sp = dtrace_fuword64((void *)sp);
395			}
396		}
397
398		/*
399		 * This is totally bogus:  if we faulted, we're going to clear
400		 * the fault and break.  This is to deal with the apparently
401		 * broken Java stacks on x86.
402		 */
403		if (*flags & CPU_DTRACE_FAULT) {
404			*flags &= ~CPU_DTRACE_FAULT;
405			break;
406		}
407	}
408
409zero:
410	while (pcstack_limit-- > 0)
411		*pcstack++ = 0;
412}
413
414/*ARGSUSED*/
415uint64_t
416dtrace_getarg(int arg, int aframes)
417{
418	uintptr_t val;
419	uintptr_t *fp = (uintptr_t *)__builtin_frame_address(0);
420	uintptr_t *stack;
421	int i;
422
423	/*
424	 * A total of 8 arguments are passed via registers; any argument with
425	 * index of 7 or lower is therefore in a register.
426	 */
427	int inreg = 7;
428
429	for (i = 1; i <= aframes; i++) {
430		fp = (uintptr_t *)*fp;
431
432		/*
433		 * On ppc32 trapexit() is the immediately following label.  On
434		 * ppc64 AIM trapexit() follows a nop.
435		 */
436#ifdef __powerpc64__
437		if ((long)(fp[2]) + 4 == (long)trapexit) {
438#else
439		if ((long)(fp[1]) == (long)trapexit) {
440#endif
441			/*
442			 * In the case of powerpc, we will use the pointer to the regs
443			 * structure that was pushed when we took the trap.  To get this
444			 * structure, we must increment beyond the frame structure.  If the
445			 * argument that we're seeking is passed on the stack, we'll pull
446			 * the true stack pointer out of the saved registers and decrement
447			 * our argument by the number of arguments passed in registers; if
448			 * the argument we're seeking is passed in regsiters, we can just
449			 * load it directly.
450			 */
451#ifdef __powerpc64__
452			struct reg *rp = (struct reg *)((uintptr_t)fp[0] + 48);
453#else
454			struct reg *rp = (struct reg *)((uintptr_t)fp[0] + 8);
455#endif
456
457			if (arg <= inreg) {
458				stack = &rp->fixreg[3];
459			} else {
460				stack = (uintptr_t *)(rp->fixreg[1]);
461				arg -= inreg;
462			}
463			goto load;
464		}
465
466	}
467
468	/*
469	 * We know that we did not come through a trap to get into
470	 * dtrace_probe() -- the provider simply called dtrace_probe()
471	 * directly.  As this is the case, we need to shift the argument
472	 * that we're looking for:  the probe ID is the first argument to
473	 * dtrace_probe(), so the argument n will actually be found where
474	 * one would expect to find argument (n + 1).
475	 */
476	arg++;
477
478	if (arg <= inreg) {
479		/*
480		 * This shouldn't happen.  If the argument is passed in a
481		 * register then it should have been, well, passed in a
482		 * register...
483		 */
484		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
485		return (0);
486	}
487
488	arg -= (inreg + 1);
489	stack = fp + 2;
490
491load:
492	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
493	val = stack[arg];
494	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
495
496	return (val);
497}
498
499int
500dtrace_getstackdepth(int aframes)
501{
502	int depth = 0;
503	uintptr_t osp, sp;
504	vm_offset_t callpc;
505
506	osp = PAGE_SIZE;
507	sp = (uintptr_t)__builtin_frame_address(0);
508	for(;;) {
509		if (sp <= osp)
510			break;
511
512		if (!dtrace_sp_inkernel(sp))
513			break;
514
515		depth++;
516		osp = sp;
517		dtrace_next_sp_pc(sp, &sp, NULL);
518	}
519	if (depth < aframes)
520		return (0);
521
522	return (depth - aframes);
523}
524
525ulong_t
526dtrace_getreg(struct trapframe *rp, uint_t reg)
527{
528	if (reg < 32)
529		return (rp->fixreg[reg]);
530
531	switch (reg) {
532	case 32:
533		return (rp->lr);
534	case 33:
535		return (rp->cr);
536	case 34:
537		return (rp->xer);
538	case 35:
539		return (rp->ctr);
540	case 36:
541		return (rp->srr0);
542	case 37:
543		return (rp->srr1);
544	case 38:
545		return (rp->exc);
546	default:
547		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
548		return (0);
549	}
550}
551
552static int
553dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size)
554{
555	ASSERT(INKERNEL(kaddr) && kaddr + size >= kaddr);
556
557	if (uaddr + size > VM_MAXUSER_ADDRESS || uaddr + size < uaddr) {
558		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
559		cpu_core[curcpu].cpuc_dtrace_illval = uaddr;
560		return (0);
561	}
562
563	return (1);
564}
565
566void
567dtrace_copyin(uintptr_t uaddr, uintptr_t kaddr, size_t size,
568    volatile uint16_t *flags)
569{
570	if (dtrace_copycheck(uaddr, kaddr, size))
571		if (copyin((const void *)uaddr, (void *)kaddr, size)) {
572			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
573			cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
574		}
575}
576
577void
578dtrace_copyout(uintptr_t kaddr, uintptr_t uaddr, size_t size,
579    volatile uint16_t *flags)
580{
581	if (dtrace_copycheck(uaddr, kaddr, size)) {
582		if (copyout((const void *)kaddr, (void *)uaddr, size)) {
583			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
584			cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
585		}
586	}
587}
588
589void
590dtrace_copyinstr(uintptr_t uaddr, uintptr_t kaddr, size_t size,
591    volatile uint16_t *flags)
592{
593	size_t actual;
594	int    error;
595
596	if (dtrace_copycheck(uaddr, kaddr, size)) {
597		error = copyinstr((const void *)uaddr, (void *)kaddr,
598		    size, &actual);
599
600		/* ENAMETOOLONG is not a fault condition. */
601		if (error && error != ENAMETOOLONG) {
602			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
603			cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
604		}
605	}
606}
607
608/*
609 * The bulk of this function could be replaced to match dtrace_copyinstr()
610 * if we ever implement a copyoutstr().
611 */
612void
613dtrace_copyoutstr(uintptr_t kaddr, uintptr_t uaddr, size_t size,
614    volatile uint16_t *flags)
615{
616	size_t len;
617
618	if (dtrace_copycheck(uaddr, kaddr, size)) {
619		len = strlen((const char *)kaddr);
620		if (len > size)
621			len = size;
622
623		if (copyout((const void *)kaddr, (void *)uaddr, len)) {
624			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
625			cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
626		}
627	}
628}
629
630uint8_t
631dtrace_fuword8(void *uaddr)
632{
633	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
634		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
635		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
636		return (0);
637	}
638	return (fubyte(uaddr));
639}
640
641uint16_t
642dtrace_fuword16(void *uaddr)
643{
644	uint16_t ret = 0;
645
646	if (dtrace_copycheck((uintptr_t)uaddr, (uintptr_t)&ret, sizeof(ret))) {
647		if (copyin((const void *)uaddr, (void *)&ret, sizeof(ret))) {
648			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
649			cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
650		}
651	}
652	return ret;
653}
654
655uint32_t
656dtrace_fuword32(void *uaddr)
657{
658	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
659		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
660		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
661		return (0);
662	}
663	return (fuword32(uaddr));
664}
665
666uint64_t
667dtrace_fuword64(void *uaddr)
668{
669	uint64_t ret = 0;
670
671	if (dtrace_copycheck((uintptr_t)uaddr, (uintptr_t)&ret, sizeof(ret))) {
672		if (copyin((const void *)uaddr, (void *)&ret, sizeof(ret))) {
673			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
674			cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
675		}
676	}
677	return ret;
678}
679
680uintptr_t
681dtrace_fulword(void *uaddr)
682{
683	uintptr_t ret = 0;
684
685	if (dtrace_copycheck((uintptr_t)uaddr, (uintptr_t)&ret, sizeof(ret))) {
686		if (copyin((const void *)uaddr, (void *)&ret, sizeof(ret))) {
687			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
688			cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
689		}
690	}
691	return ret;
692}
693