1/*
2 * Copyright (c) 2005-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
30#include <kern/thread.h>
31#include <mach/thread_status.h>
32
33typedef x86_saved_state_t savearea_t;
34
35#include <stdarg.h>
36#include <string.h>
37#include <sys/malloc.h>
38#include <sys/time.h>
39#include <sys/systm.h>
40#include <sys/proc.h>
41#include <sys/proc_internal.h>
42#include <sys/kauth.h>
43#include <sys/dtrace.h>
44#include <sys/dtrace_impl.h>
45#include <libkern/OSAtomic.h>
46#include <kern/thread_call.h>
47#include <kern/task.h>
48#include <kern/sched_prim.h>
49#include <miscfs/devfs/devfs.h>
50#include <mach/vm_param.h>
51#include <machine/pal_routines.h>
52#include <i386/mp.h>
53
54/*
55 * APPLE NOTE:  The regmap is used to decode which 64bit uregs[] register
56 * is being accessed when passed the 32bit uregs[] constant (based on
57 * the reg.d translator file). The dtrace_getreg() is smart enough to handle
58 * the register mappings.   The register set definitions are the same as
59 * those used by the fasttrap_getreg code.
60 */
61#include "fasttrap_regset.h"
62static const uint8_t regmap[19] = {
63    REG_GS,		/* GS */
64    REG_FS,		/* FS */
65    REG_ES,		/* ES */
66    REG_DS,		/* DS */
67    REG_RDI,		/* EDI */
68    REG_RSI,		/* ESI */
69    REG_RBP,		/* EBP, REG_FP  */
70    REG_RSP,		/* ESP */
71    REG_RBX,		/* EBX */
72    REG_RDX,		/* EDX, REG_R1  */
73    REG_RCX,		/* ECX */
74    REG_RAX,		/* EAX, REG_R0  */
75    REG_TRAPNO,		/* TRAPNO */
76    REG_ERR,		/* ERR */
77    REG_RIP,		/* EIP, REG_PC  */
78    REG_CS,		/* CS */
79    REG_RFL,		/* EFL, REG_PS  */
80    REG_RSP,		/* UESP, REG_SP */
81    REG_SS		/* SS */
82};
83
84extern dtrace_id_t      dtrace_probeid_error;   /* special ERROR probe */
85
86void
87dtrace_probe_error(dtrace_state_t *state, dtrace_epid_t epid, int which,
88    int fltoffs, int fault, uint64_t illval)
89{
90    /*
91     * For the case of the error probe firing lets
92     * stash away "illval" here, and special-case retrieving it in DIF_VARIABLE_ARG.
93     */
94    state->dts_arg_error_illval = illval;
95    dtrace_probe( dtrace_probeid_error, (uint64_t)(uintptr_t)state, epid, which, fltoffs, fault );
96}
97
98/*
99 * Atomicity and synchronization
100 */
101void
102dtrace_membar_producer(void)
103{
104	__asm__ volatile("sfence");
105}
106
107void
108dtrace_membar_consumer(void)
109{
110	__asm__ volatile("lfence");
111}
112
113/*
114 * Interrupt manipulation
115 * XXX dtrace_getipl() can be called from probe context.
116 */
117int
118dtrace_getipl(void)
119{
120	/*
121	 * XXX Drat, get_interrupt_level is MACH_KERNEL_PRIVATE
122	 * in osfmk/kern/cpu_data.h
123	 */
124	/* return get_interrupt_level(); */
125	return (ml_at_interrupt_context() ? 1: 0);
126}
127
128/*
129 * MP coordination
130 */
131typedef struct xcArg {
132	processorid_t cpu;
133	dtrace_xcall_t f;
134	void *arg;
135} xcArg_t;
136
137static void
138xcRemote( void *foo )
139{
140	xcArg_t *pArg = (xcArg_t *)foo;
141
142	if ( pArg->cpu == CPU->cpu_id || pArg->cpu == DTRACE_CPUALL ) {
143		(pArg->f)(pArg->arg);
144	}
145}
146
147
148/*
149 * dtrace_xcall() is not called from probe context.
150 */
151void
152dtrace_xcall(processorid_t cpu, dtrace_xcall_t f, void *arg)
153{
154	xcArg_t xcArg;
155
156	xcArg.cpu = cpu;
157	xcArg.f = f;
158	xcArg.arg = arg;
159
160	if (cpu == DTRACE_CPUALL) {
161		mp_cpus_call (CPUMASK_ALL, SYNC, xcRemote, (void*)&xcArg);
162	}
163	else {
164		mp_cpus_call (cpu_to_cpumask((cpu_t)cpu), SYNC, xcRemote, (void*)&xcArg);
165	}
166}
167
168/*
169 * Initialization
170 */
171void
172dtrace_isa_init(void)
173{
174	return;
175}
176
177/*
178 * Runtime and ABI
179 */
180uint64_t
181dtrace_getreg(struct regs *savearea, uint_t reg)
182{
183	boolean_t is64Bit = proc_is64bit(current_proc());
184	x86_saved_state_t *regs = (x86_saved_state_t *)savearea;
185
186	if (is64Bit) {
187	    if (reg <= SS) {
188		reg = regmap[reg];
189	    } else {
190		reg -= (SS + 1);
191	    }
192
193	    switch (reg) {
194	    case REG_RDI:
195		return (uint64_t)(regs->ss_64.rdi);
196	    case REG_RSI:
197		return (uint64_t)(regs->ss_64.rsi);
198	    case REG_RDX:
199		return (uint64_t)(regs->ss_64.rdx);
200	    case REG_RCX:
201		return (uint64_t)(regs->ss_64.rcx);
202	    case REG_R8:
203		return (uint64_t)(regs->ss_64.r8);
204	    case REG_R9:
205		return (uint64_t)(regs->ss_64.r9);
206	    case REG_RAX:
207		return (uint64_t)(regs->ss_64.rax);
208	    case REG_RBX:
209		return (uint64_t)(regs->ss_64.rbx);
210	    case REG_RBP:
211		return (uint64_t)(regs->ss_64.rbp);
212	    case REG_R10:
213		return (uint64_t)(regs->ss_64.r10);
214	    case REG_R11:
215		return (uint64_t)(regs->ss_64.r11);
216	    case REG_R12:
217		return (uint64_t)(regs->ss_64.r12);
218	    case REG_R13:
219		return (uint64_t)(regs->ss_64.r13);
220	    case REG_R14:
221		return (uint64_t)(regs->ss_64.r14);
222	    case REG_R15:
223		return (uint64_t)(regs->ss_64.r15);
224	    case REG_FS:
225		return (uint64_t)(regs->ss_64.fs);
226	    case REG_GS:
227		return (uint64_t)(regs->ss_64.gs);
228	    case REG_TRAPNO:
229		return (uint64_t)(regs->ss_64.isf.trapno);
230	    case REG_ERR:
231		return (uint64_t)(regs->ss_64.isf.err);
232	    case REG_RIP:
233		return (uint64_t)(regs->ss_64.isf.rip);
234	    case REG_CS:
235		return (uint64_t)(regs->ss_64.isf.cs);
236	    case REG_SS:
237		return (uint64_t)(regs->ss_64.isf.ss);
238	    case REG_RFL:
239		return (uint64_t)(regs->ss_64.isf.rflags);
240	    case REG_RSP:
241		return (uint64_t)(regs->ss_64.isf.rsp);
242	    case REG_DS:
243	    case REG_ES:
244	    default:
245		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
246		return (0);
247	    }
248
249	} else {   /* is 32bit user */
250		/* beyond register SS */
251		if (reg > x86_SAVED_STATE32_COUNT - 1) {
252			DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
253			return (0);
254		}
255		return (uint64_t)((unsigned int *)(&(regs->ss_32.gs)))[reg];
256	}
257}
258
259#define RETURN_OFFSET 4
260#define RETURN_OFFSET64 8
261
262static int
263dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, user_addr_t pc,
264    user_addr_t sp)
265{
266#if 0
267	volatile uint16_t *flags =
268	    (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
269
270	uintptr_t oldcontext = lwp->lwp_oldcontext; /* XXX signal stack crawl */
271	size_t s1, s2;
272#endif
273	int ret = 0;
274	boolean_t is64Bit = proc_is64bit(current_proc());
275
276	ASSERT(pcstack == NULL || pcstack_limit > 0);
277
278#if 0 /* XXX signal stack crawl */
279	if (p->p_model == DATAMODEL_NATIVE) {
280		s1 = sizeof (struct frame) + 2 * sizeof (long);
281		s2 = s1 + sizeof (siginfo_t);
282	} else {
283		s1 = sizeof (struct frame32) + 3 * sizeof (int);
284		s2 = s1 + sizeof (siginfo32_t);
285	}
286#endif
287
288	while (pc != 0) {
289		ret++;
290		if (pcstack != NULL) {
291			*pcstack++ = (uint64_t)pc;
292			pcstack_limit--;
293			if (pcstack_limit <= 0)
294				break;
295		}
296
297		if (sp == 0)
298			break;
299
300#if 0 /* XXX signal stack crawl */
301		if (oldcontext == sp + s1 || oldcontext == sp + s2) {
302			if (p->p_model == DATAMODEL_NATIVE) {
303				ucontext_t *ucp = (ucontext_t *)oldcontext;
304				greg_t *gregs = ucp->uc_mcontext.gregs;
305
306				sp = dtrace_fulword(&gregs[REG_FP]);
307				pc = dtrace_fulword(&gregs[REG_PC]);
308
309				oldcontext = dtrace_fulword(&ucp->uc_link);
310			} else {
311				ucontext32_t *ucp = (ucontext32_t *)oldcontext;
312				greg32_t *gregs = ucp->uc_mcontext.gregs;
313
314				sp = dtrace_fuword32(&gregs[EBP]);
315				pc = dtrace_fuword32(&gregs[EIP]);
316
317				oldcontext = dtrace_fuword32(&ucp->uc_link);
318			}
319		}
320		else
321#endif
322		{
323			if (is64Bit) {
324				pc = dtrace_fuword64((sp + RETURN_OFFSET64));
325				sp = dtrace_fuword64(sp);
326			} else {
327				pc = dtrace_fuword32((sp + RETURN_OFFSET));
328				sp = dtrace_fuword32(sp);
329			}
330		}
331
332#if 0 /* XXX */
333		/*
334		 * This is totally bogus:  if we faulted, we're going to clear
335		 * the fault and break.  This is to deal with the apparently
336		 * broken Java stacks on x86.
337		 */
338		if (*flags & CPU_DTRACE_FAULT) {
339			*flags &= ~CPU_DTRACE_FAULT;
340			break;
341		}
342#endif
343	}
344
345	return (ret);
346}
347
348
349/*
350 * The return value indicates if we've modified the stack.
351 */
352static int
353dtrace_adjust_stack(uint64_t **pcstack, int *pcstack_limit, user_addr_t *pc,
354                    user_addr_t sp)
355{
356    int64_t missing_tos;
357    int rc = 0;
358    boolean_t is64Bit = proc_is64bit(current_proc());
359
360    ASSERT(pc != NULL);
361
362    if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
363        /*
364         * If we found ourselves in an entry probe, the frame pointer has not
365         * yet been pushed (that happens in the
366         * function prologue).  The best approach is to
367	 * add the current pc as a missing top of stack,
368         * and back the pc up to the caller, which is stored  at the
369         * current stack pointer address since the call
370         * instruction puts it there right before
371         * the branch.
372         */
373
374        missing_tos = *pc;
375
376        if (is64Bit)
377            *pc = dtrace_fuword64(sp);
378        else
379            *pc = dtrace_fuword32(sp);
380    } else {
381        /*
382         * We might have a top of stack override, in which case we just
383         * add that frame without question to the top.  This
384         * happens in return probes where you have a valid
385         * frame pointer, but it's for the callers frame
386         * and you'd like to add the pc of the return site
387         * to the frame.
388         */
389        missing_tos = cpu_core[CPU->cpu_id].cpuc_missing_tos;
390    }
391
392    if (missing_tos != 0) {
393        if (pcstack != NULL && pcstack_limit != NULL) {
394            /*
395	     * If the missing top of stack has been filled out, then
396	     * we add it and adjust the size.
397             */
398	    *(*pcstack)++ = missing_tos;
399	    (*pcstack_limit)--;
400	}
401        /*
402	 * return 1 because we would have changed the
403	 * stack whether or not it was passed in.  This
404	 * ensures the stack count is correct
405	 */
406         rc = 1;
407    }
408    return rc;
409}
410
411void
412dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
413{
414	thread_t thread = current_thread();
415	x86_saved_state_t *regs;
416	user_addr_t pc, sp, fp;
417	volatile uint16_t *flags =
418	    (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
419	int n;
420	boolean_t is64Bit = proc_is64bit(current_proc());
421
422	if (*flags & CPU_DTRACE_FAULT)
423		return;
424
425	if (pcstack_limit <= 0)
426		return;
427
428	/*
429	 * If there's no user context we still need to zero the stack.
430	 */
431	if (thread == NULL)
432		goto zero;
433
434	pal_register_cache_state(thread, VALID);
435	regs = (x86_saved_state_t *)find_user_regs(thread);
436	if (regs == NULL)
437		goto zero;
438
439	*pcstack++ = (uint64_t)dtrace_proc_selfpid();
440	pcstack_limit--;
441
442	if (pcstack_limit <= 0)
443		return;
444
445	if (is64Bit) {
446		pc = regs->ss_64.isf.rip;
447		sp = regs->ss_64.isf.rsp;
448		fp = regs->ss_64.rbp;
449	} else {
450		pc = regs->ss_32.eip;
451		sp = regs->ss_32.uesp;
452		fp = regs->ss_32.ebp;
453	}
454
455        /*
456	 * The return value indicates if we've modified the stack.
457	 * Since there is nothing else to fix up in either case,
458	 * we can safely ignore it here.
459	 */
460	(void)dtrace_adjust_stack(&pcstack, &pcstack_limit, &pc, sp);
461
462	if(pcstack_limit <= 0)
463	    return;
464
465	/*
466	 * Note that unlike ppc, the x86 code does not use
467	 * CPU_DTRACE_USTACK_FP. This is because x86 always
468	 * traces from the fp, even in syscall/profile/fbt
469	 * providers.
470	 */
471	n = dtrace_getustack_common(pcstack, pcstack_limit, pc, fp);
472	ASSERT(n >= 0);
473	ASSERT(n <= pcstack_limit);
474
475	pcstack += n;
476	pcstack_limit -= n;
477
478zero:
479	while (pcstack_limit-- > 0)
480		*pcstack++ = 0;
481}
482
483int
484dtrace_getustackdepth(void)
485{
486	thread_t thread = current_thread();
487	x86_saved_state_t *regs;
488	user_addr_t pc, sp, fp;
489	int n = 0;
490	boolean_t is64Bit = proc_is64bit(current_proc());
491
492	if (thread == NULL)
493		return 0;
494
495	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
496		return (-1);
497
498	pal_register_cache_state(thread, VALID);
499	regs = (x86_saved_state_t *)find_user_regs(thread);
500	if (regs == NULL)
501		return 0;
502
503	if (is64Bit) {
504		pc = regs->ss_64.isf.rip;
505		sp = regs->ss_64.isf.rsp;
506		fp = regs->ss_64.rbp;
507	} else {
508		pc = regs->ss_32.eip;
509		sp = regs->ss_32.uesp;
510		fp = regs->ss_32.ebp;
511	}
512
513	if (dtrace_adjust_stack(NULL, NULL, &pc, sp) == 1) {
514	    /*
515	     * we would have adjusted the stack if we had
516	     * supplied one (that is what rc == 1 means).
517	     * Also, as a side effect, the pc might have
518	     * been fixed up, which is good for calling
519	     * in to dtrace_getustack_common.
520	     */
521	    n++;
522	}
523
524	/*
525	 * Note that unlike ppc, the x86 code does not use
526	 * CPU_DTRACE_USTACK_FP. This is because x86 always
527	 * traces from the fp, even in syscall/profile/fbt
528	 * providers.
529	 */
530
531	n += dtrace_getustack_common(NULL, 0, pc, fp);
532
533	return (n);
534}
535
536void
537dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit)
538{
539	thread_t thread = current_thread();
540	savearea_t *regs;
541	user_addr_t pc, sp;
542	volatile uint16_t *flags =
543	    (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
544#if 0
545	uintptr_t oldcontext;
546	size_t s1, s2;
547#endif
548	boolean_t is64Bit = proc_is64bit(current_proc());
549
550	if (*flags & CPU_DTRACE_FAULT)
551		return;
552
553	if (pcstack_limit <= 0)
554		return;
555
556	/*
557	 * If there's no user context we still need to zero the stack.
558	 */
559	if (thread == NULL)
560		goto zero;
561
562	regs = (savearea_t *)find_user_regs(thread);
563	if (regs == NULL)
564		goto zero;
565
566	*pcstack++ = (uint64_t)dtrace_proc_selfpid();
567	pcstack_limit--;
568
569	if (pcstack_limit <= 0)
570		return;
571
572	pc = regs->ss_32.eip;
573	sp = regs->ss_32.ebp;
574
575#if 0 /* XXX signal stack crawl */
576	oldcontext = lwp->lwp_oldcontext;
577
578	if (p->p_model == DATAMODEL_NATIVE) {
579		s1 = sizeof (struct frame) + 2 * sizeof (long);
580		s2 = s1 + sizeof (siginfo_t);
581	} else {
582		s1 = sizeof (struct frame32) + 3 * sizeof (int);
583		s2 = s1 + sizeof (siginfo32_t);
584	}
585#endif
586
587	if(dtrace_adjust_stack(&pcstack, &pcstack_limit, &pc, sp) == 1) {
588            /*
589	     * we made a change.
590	     */
591	    *fpstack++ = 0;
592	    if (pcstack_limit <= 0)
593		return;
594	}
595
596	while (pc != 0) {
597		*pcstack++ = (uint64_t)pc;
598		*fpstack++ = sp;
599		pcstack_limit--;
600		if (pcstack_limit <= 0)
601			break;
602
603		if (sp == 0)
604			break;
605
606#if 0 /* XXX signal stack crawl */
607		if (oldcontext == sp + s1 || oldcontext == sp + s2) {
608			if (p->p_model == DATAMODEL_NATIVE) {
609				ucontext_t *ucp = (ucontext_t *)oldcontext;
610				greg_t *gregs = ucp->uc_mcontext.gregs;
611
612				sp = dtrace_fulword(&gregs[REG_FP]);
613				pc = dtrace_fulword(&gregs[REG_PC]);
614
615				oldcontext = dtrace_fulword(&ucp->uc_link);
616			} else {
617				ucontext_t *ucp = (ucontext_t *)oldcontext;
618				greg_t *gregs = ucp->uc_mcontext.gregs;
619
620				sp = dtrace_fuword32(&gregs[EBP]);
621				pc = dtrace_fuword32(&gregs[EIP]);
622
623				oldcontext = dtrace_fuword32(&ucp->uc_link);
624			}
625		}
626		else
627#endif
628		{
629			if (is64Bit) {
630				pc = dtrace_fuword64((sp + RETURN_OFFSET64));
631				sp = dtrace_fuword64(sp);
632			} else {
633				pc = dtrace_fuword32((sp + RETURN_OFFSET));
634				sp = dtrace_fuword32(sp);
635			}
636		}
637
638#if 0 /* XXX */
639		/*
640		 * This is totally bogus:  if we faulted, we're going to clear
641		 * the fault and break.  This is to deal with the apparently
642		 * broken Java stacks on x86.
643		 */
644		if (*flags & CPU_DTRACE_FAULT) {
645			*flags &= ~CPU_DTRACE_FAULT;
646			break;
647		}
648#endif
649	}
650
651zero:
652	while (pcstack_limit-- > 0)
653		*pcstack++ = 0;
654}
655
656void
657dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
658		  uint32_t *intrpc)
659{
660	struct frame *fp = (struct frame *)__builtin_frame_address(0);
661	struct frame *nextfp, *minfp, *stacktop;
662	int depth = 0;
663	int last = 0;
664	uintptr_t pc;
665	uintptr_t caller = CPU->cpu_dtrace_caller;
666	int on_intr;
667
668	if ((on_intr = CPU_ON_INTR(CPU)) != 0)
669		stacktop = (struct frame *)dtrace_get_cpu_int_stack_top();
670	else
671		stacktop = (struct frame *)(dtrace_get_kernel_stack(current_thread()) + kernel_stack_size);
672
673	minfp = fp;
674
675	aframes++;
676
677	if (intrpc != NULL && depth < pcstack_limit)
678		pcstack[depth++] = (pc_t)intrpc;
679
680	while (depth < pcstack_limit) {
681		nextfp = *(struct frame **)fp;
682		pc = *(uintptr_t *)(((uintptr_t)fp) + RETURN_OFFSET64);
683
684		if (nextfp <= minfp || nextfp >= stacktop) {
685			if (on_intr) {
686				/*
687				 * Hop from interrupt stack to thread stack.
688				 */
689				vm_offset_t kstack_base = dtrace_get_kernel_stack(current_thread());
690
691				minfp = (struct frame *)kstack_base;
692				stacktop = (struct frame *)(kstack_base + kernel_stack_size);
693
694				on_intr = 0;
695				continue;
696			}
697			/*
698			 * This is the last frame we can process; indicate
699			 * that we should return after processing this frame.
700			 */
701			last = 1;
702		}
703
704		if (aframes > 0) {
705			if (--aframes == 0 && caller != 0) {
706				/*
707				 * We've just run out of artificial frames,
708				 * and we have a valid caller -- fill it in
709				 * now.
710				 */
711				ASSERT(depth < pcstack_limit);
712				pcstack[depth++] = (pc_t)caller;
713				caller = 0;
714			}
715		} else {
716			if (depth < pcstack_limit)
717				pcstack[depth++] = (pc_t)pc;
718		}
719
720		if (last) {
721			while (depth < pcstack_limit)
722				pcstack[depth++] = 0;
723			return;
724		}
725
726		fp = nextfp;
727		minfp = fp;
728	}
729}
730
731struct frame {
732	struct frame *backchain;
733	uintptr_t retaddr;
734};
735
736uint64_t
737dtrace_getarg(int arg, int aframes)
738{
739	uint64_t val;
740	struct frame *fp = (struct frame *)__builtin_frame_address(0);
741	uintptr_t *stack;
742	uintptr_t pc;
743	int i;
744
745
746    /*
747     * A total of 6 arguments are passed via registers; any argument with
748     * index of 5 or lower is therefore in a register.
749     */
750    int inreg = 5;
751
752	for (i = 1; i <= aframes; i++) {
753		fp = fp->backchain;
754		pc = fp->retaddr;
755
756		if (dtrace_invop_callsite_pre != NULL
757			&& pc  >  (uintptr_t)dtrace_invop_callsite_pre
758			&& pc  <= (uintptr_t)dtrace_invop_callsite_post) {
759			/*
760			 * In the case of x86_64, we will use the pointer to the
761			 * save area structure that was pushed when we took the
762			 * trap.  To get this structure, we must increment
763			 * beyond the frame structure. If the
764			 * argument that we're seeking is passed on the stack,
765			 * we'll pull the true stack pointer out of the saved
766			 * registers and decrement our argument by the number
767			 * of arguments passed in registers; if the argument
768			 * we're seeking is passed in regsiters, we can just
769			 * load it directly.
770			 */
771
772			/* fp points to frame of dtrace_invop() activation. */
773			fp = fp->backchain; /* to fbt_perfcallback() activation. */
774			fp = fp->backchain; /* to kernel_trap() activation. */
775			fp = fp->backchain; /* to trap_from_kernel() activation. */
776
777			x86_saved_state_t   *tagged_regs = (x86_saved_state_t *)&fp[1];
778			x86_saved_state64_t *saved_state = saved_state64(tagged_regs);
779
780			if (arg <= inreg) {
781				stack = (uintptr_t *)&saved_state->rdi;
782			} else {
783				fp = (struct frame *)(saved_state->isf.rsp);
784				stack = (uintptr_t *)&fp[1]; /* Find marshalled
785								arguments */
786				arg -= inreg + 1;
787			}
788			goto load;
789		}
790	}
791
792	/*
793	 * We know that we did not come through a trap to get into
794	 * dtrace_probe() --  We arrive here when the provider has
795	 * called dtrace_probe() directly.
796	 * The probe ID is the first argument to dtrace_probe().
797	 * We must advance beyond that to get the argX.
798	 */
799	arg++; /* Advance past probeID */
800
801	if (arg <= inreg) {
802		/*
803		 * This shouldn't happen.  If the argument is passed in a
804		 * register then it should have been, well, passed in a
805		 * register...
806		 */
807		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
808		return (0);
809	}
810
811	arg -= (inreg + 1);
812	stack = (uintptr_t *)&fp[1]; /* Find marshalled arguments */
813
814load:
815	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
816	/* dtrace_probe arguments arg0 ... arg4 are 64bits wide */
817	val = (uint64_t)(*(((uintptr_t *)stack) + arg));
818	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
819
820	return (val);
821}
822
823/*
824 * Load/Store Safety
825 */
826void
827dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit))
828{
829	/*
830	 * "base" is the smallest toxic address in the range, "limit" is the first
831	 * VALID address greater than "base".
832	 */
833	func(0x0, VM_MIN_KERNEL_AND_KEXT_ADDRESS);
834	if (VM_MAX_KERNEL_ADDRESS < ~(uintptr_t)0)
835			func(VM_MAX_KERNEL_ADDRESS + 1, ~(uintptr_t)0);
836}
837
838