1/*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#ifdef	MACH_BSD
29#include <mach_rt.h>
30#include <mach_debug.h>
31#include <mach_ldebug.h>
32
33#include <mach/kern_return.h>
34#include <mach/mach_traps.h>
35#include <mach/thread_status.h>
36#include <mach/vm_param.h>
37
38#include <kern/counters.h>
39#include <kern/cpu_data.h>
40#include <kern/mach_param.h>
41#include <kern/task.h>
42#include <kern/thread.h>
43#include <kern/sched_prim.h>
44#include <kern/misc_protos.h>
45#include <kern/assert.h>
46#include <kern/debug.h>
47#include <kern/spl.h>
48#include <kern/syscall_sw.h>
49#include <ipc/ipc_port.h>
50#include <vm/vm_kern.h>
51#include <vm/pmap.h>
52
53#include <i386/cpu_number.h>
54#include <i386/eflags.h>
55#include <i386/proc_reg.h>
56#include <i386/tss.h>
57#include <i386/user_ldt.h>
58#include <i386/fpu.h>
59#include <i386/machdep_call.h>
60#include <i386/vmparam.h>
61#include <i386/mp_desc.h>
62#include <i386/misc_protos.h>
63#include <i386/thread.h>
64#include <i386/trap.h>
65#include <i386/seg.h>
66#include <mach/i386/syscall_sw.h>
67#include <sys/syscall.h>
68#include <sys/kdebug.h>
69#include <sys/errno.h>
70#include <../bsd/sys/sysent.h>
71
72#ifdef MACH_BSD
73extern void	mach_kauth_cred_uthread_update(void);
74#endif
75
76void * find_user_regs(thread_t);
77
78unsigned int get_msr_exportmask(void);
79
80unsigned int get_msr_nbits(void);
81
82unsigned int get_msr_rbits(void);
83
84extern void throttle_lowpri_io(int);
85
86/*
87 * thread_userstack:
88 *
89 * Return the user stack pointer from the machine
90 * dependent thread state info.
91 */
92kern_return_t
93thread_userstack(
94    __unused thread_t   thread,
95    int                 flavor,
96    thread_state_t      tstate,
97    __unused unsigned int        count,
98    mach_vm_offset_t    *user_stack,
99	int					*customstack
100)
101{
102	if (customstack)
103		*customstack = 0;
104
105	switch (flavor) {
106	case x86_THREAD_STATE32:
107		{
108			x86_thread_state32_t *state25;
109
110			state25 = (x86_thread_state32_t *) tstate;
111
112			if (state25->esp) {
113				*user_stack = state25->esp;
114				if (customstack)
115					*customstack = 1;
116			} else {
117				*user_stack = VM_USRSTACK32;
118				if (customstack)
119					*customstack = 0;
120			}
121			break;
122		}
123
124	case x86_THREAD_STATE64:
125		{
126			x86_thread_state64_t *state25;
127
128			state25 = (x86_thread_state64_t *) tstate;
129
130			if (state25->rsp) {
131				*user_stack = state25->rsp;
132				if (customstack)
133					*customstack = 1;
134			} else {
135				*user_stack = VM_USRSTACK64;
136				if (customstack)
137					*customstack = 0;
138			}
139			break;
140		}
141
142	default:
143		return (KERN_INVALID_ARGUMENT);
144	}
145
146	return (KERN_SUCCESS);
147}
148
149/*
150 * thread_userstackdefault:
151 *
152 * Return the default stack location for the
153 * thread, if otherwise unknown.
154 */
155kern_return_t
156thread_userstackdefault(
157	thread_t thread,
158	mach_vm_offset_t *default_user_stack)
159{
160	if (thread_is_64bit(thread)) {
161		*default_user_stack = VM_USRSTACK64;
162	} else {
163		*default_user_stack = VM_USRSTACK32;
164	}
165	return (KERN_SUCCESS);
166}
167
168kern_return_t
169thread_entrypoint(
170    __unused thread_t   thread,
171    int                 flavor,
172    thread_state_t      tstate,
173    __unused unsigned int        count,
174    mach_vm_offset_t    *entry_point
175)
176{
177	/*
178	 * Set a default.
179	 */
180	if (*entry_point == 0)
181		*entry_point = VM_MIN_ADDRESS;
182
183	switch (flavor) {
184	case x86_THREAD_STATE32:
185		{
186			x86_thread_state32_t *state25;
187
188			state25 = (i386_thread_state_t *) tstate;
189			*entry_point = state25->eip ? state25->eip: VM_MIN_ADDRESS;
190			break;
191		}
192
193	case x86_THREAD_STATE64:
194		{
195			x86_thread_state64_t *state25;
196
197			state25 = (x86_thread_state64_t *) tstate;
198			*entry_point = state25->rip ? state25->rip: VM_MIN_ADDRESS64;
199			break;
200		}
201	}
202	return (KERN_SUCCESS);
203}
204
205/*
206 * FIXME - thread_set_child
207 */
208
209void thread_set_child(thread_t child, int pid);
210void
211thread_set_child(thread_t child, int pid)
212{
213	pal_register_cache_state(child, DIRTY);
214
215	if (thread_is_64bit(child)) {
216		x86_saved_state64_t	*iss64;
217
218		iss64 = USER_REGS64(child);
219
220		iss64->rax = pid;
221		iss64->rdx = 1;
222		iss64->isf.rflags &= ~EFL_CF;
223	} else {
224		x86_saved_state32_t	*iss32;
225
226		iss32 = USER_REGS32(child);
227
228		iss32->eax = pid;
229		iss32->edx = 1;
230		iss32->efl &= ~EFL_CF;
231	}
232}
233
234
235
236/*
237 * System Call handling code
238 */
239
240extern long fuword(vm_offset_t);
241
242
243
244void
245machdep_syscall(x86_saved_state_t *state)
246{
247	int			args[machdep_call_count];
248	int			trapno;
249	int			nargs;
250	const machdep_call_t	*entry;
251	x86_saved_state32_t	*regs;
252
253	assert(is_saved_state32(state));
254	regs = saved_state32(state);
255
256	trapno = regs->eax;
257#if DEBUG_TRACE
258	kprintf("machdep_syscall(0x%08x) code=%d\n", regs, trapno);
259#endif
260
261	DEBUG_KPRINT_SYSCALL_MDEP(
262		"machdep_syscall: trapno=%d\n", trapno);
263
264	if (trapno < 0 || trapno >= machdep_call_count) {
265		regs->eax = (unsigned int)kern_invalid(NULL);
266
267		thread_exception_return();
268		/* NOTREACHED */
269	}
270	entry = &machdep_call_table[trapno];
271	nargs = entry->nargs;
272
273	if (nargs != 0) {
274		if (copyin((user_addr_t) regs->uesp + sizeof (int),
275				(char *) args, (nargs * sizeof (int)))) {
276			regs->eax = KERN_INVALID_ADDRESS;
277
278			thread_exception_return();
279			/* NOTREACHED */
280		}
281	}
282	switch (nargs) {
283	case 0:
284		regs->eax = (*entry->routine.args_0)();
285		break;
286	case 1:
287		regs->eax = (*entry->routine.args_1)(args[0]);
288		break;
289	case 2:
290		regs->eax = (*entry->routine.args_2)(args[0],args[1]);
291		break;
292	case 3:
293		if (!entry->bsd_style)
294			regs->eax = (*entry->routine.args_3)(args[0],args[1],args[2]);
295		else {
296			int	error;
297			uint32_t	rval;
298
299			error = (*entry->routine.args_bsd_3)(&rval, args[0], args[1], args[2]);
300			if (error) {
301				regs->eax = error;
302				regs->efl |= EFL_CF;	/* carry bit */
303			} else {
304				regs->eax = rval;
305				regs->efl &= ~EFL_CF;
306			}
307		}
308		break;
309	case 4:
310		regs->eax = (*entry->routine.args_4)(args[0], args[1], args[2], args[3]);
311		break;
312
313	default:
314		panic("machdep_syscall: too many args");
315	}
316	if (current_thread()->funnel_lock)
317		(void) thread_funnel_set(current_thread()->funnel_lock, FALSE);
318
319	DEBUG_KPRINT_SYSCALL_MDEP("machdep_syscall: retval=%u\n", regs->eax);
320
321	throttle_lowpri_io(TRUE);
322
323	thread_exception_return();
324	/* NOTREACHED */
325}
326
327
328void
329machdep_syscall64(x86_saved_state_t *state)
330{
331	int			trapno;
332	const machdep_call_t	*entry;
333	x86_saved_state64_t	*regs;
334
335	assert(is_saved_state64(state));
336	regs = saved_state64(state);
337
338	trapno = (int)(regs->rax & SYSCALL_NUMBER_MASK);
339
340	DEBUG_KPRINT_SYSCALL_MDEP(
341		"machdep_syscall64: trapno=%d\n", trapno);
342
343	if (trapno < 0 || trapno >= machdep_call_count) {
344		regs->rax = (unsigned int)kern_invalid(NULL);
345
346		thread_exception_return();
347		/* NOTREACHED */
348	}
349	entry = &machdep_call_table64[trapno];
350
351	switch (entry->nargs) {
352	case 0:
353		regs->rax = (*entry->routine.args_0)();
354		break;
355	case 1:
356		regs->rax = (*entry->routine.args64_1)(regs->rdi);
357		break;
358	default:
359		panic("machdep_syscall64: too many args");
360	}
361	if (current_thread()->funnel_lock)
362		(void) thread_funnel_set(current_thread()->funnel_lock, FALSE);
363
364	DEBUG_KPRINT_SYSCALL_MDEP("machdep_syscall: retval=%llu\n", regs->rax);
365
366	throttle_lowpri_io(TRUE);
367
368	thread_exception_return();
369	/* NOTREACHED */
370}
371
372#endif	/* MACH_BSD */
373
374
375typedef kern_return_t (*mach_call_t)(void *);
376
377struct mach_call_args {
378	syscall_arg_t arg1;
379	syscall_arg_t arg2;
380	syscall_arg_t arg3;
381	syscall_arg_t arg4;
382	syscall_arg_t arg5;
383	syscall_arg_t arg6;
384	syscall_arg_t arg7;
385	syscall_arg_t arg8;
386	syscall_arg_t arg9;
387};
388
389static kern_return_t
390mach_call_arg_munger32(uint32_t sp, int nargs, int call_number, struct mach_call_args *args);
391
392
393static kern_return_t
394mach_call_arg_munger32(uint32_t sp, int nargs, int call_number, struct mach_call_args *args)
395{
396	unsigned int args32[9];
397
398	if (copyin((user_addr_t)(sp + sizeof(int)), (char *)args32, nargs * sizeof (int)))
399		return KERN_INVALID_ARGUMENT;
400
401	switch (nargs) {
402	case 9: args->arg9 = args32[8];
403	case 8: args->arg8 = args32[7];
404	case 7: args->arg7 = args32[6];
405	case 6: args->arg6 = args32[5];
406	case 5: args->arg5 = args32[4];
407	case 4: args->arg4 = args32[3];
408	case 3: args->arg3 = args32[2];
409	case 2: args->arg2 = args32[1];
410	case 1: args->arg1 = args32[0];
411	}
412	if (call_number == 10) {
413		/* munge the mach_vm_size_t for  mach_vm_allocate() */
414		args->arg3 = (((uint64_t)(args32[2])) | ((((uint64_t)(args32[3]))<<32)));
415		args->arg4 = args32[4];
416	} else if (call_number == 12) {
417		/* munge the mach_vm_address_t and mach_vm_size_t for mach_vm_deallocate() */
418		args->arg2 = (((uint64_t)(args32[1])) | ((((uint64_t)(args32[2]))<<32)));
419		args->arg3 = (((uint64_t)(args32[3])) | ((((uint64_t)(args32[4]))<<32)));
420	} else if (call_number == 14) {
421		/* munge the mach_vm_address_t and mach_vm_size_t for  mach_vm_protect() */
422		args->arg2 = (((uint64_t)(args32[1])) | ((((uint64_t)(args32[2]))<<32)));
423		args->arg3 = (((uint64_t)(args32[3])) | ((((uint64_t)(args32[4]))<<32)));
424		args->arg4 = args32[5];
425		args->arg5 = args32[6];
426	} else if (call_number == 90) {
427		/* munge_l for mach_wait_until_trap() */
428		args->arg1 = (((uint64_t)(args32[0])) | ((((uint64_t)(args32[1]))<<32)));
429	} else if (call_number == 93) {
430		/* munge_wl for mk_timer_arm_trap() */
431		args->arg2 = (((uint64_t)(args32[1])) | ((((uint64_t)(args32[2]))<<32)));
432	}
433
434	return KERN_SUCCESS;
435}
436
437
438__private_extern__ void mach_call_munger(x86_saved_state_t *state);
439
440extern const char *mach_syscall_name_table[];
441
442void
443mach_call_munger(x86_saved_state_t *state)
444{
445	int argc;
446	int call_number;
447	mach_call_t mach_call;
448	kern_return_t retval;
449	struct mach_call_args args = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
450	x86_saved_state32_t	*regs;
451
452	assert(is_saved_state32(state));
453	regs = saved_state32(state);
454
455	call_number = -(regs->eax);
456
457	DEBUG_KPRINT_SYSCALL_MACH(
458		"mach_call_munger: code=%d(%s)\n",
459		call_number, mach_syscall_name_table[call_number]);
460#if DEBUG_TRACE
461	kprintf("mach_call_munger(0x%08x) code=%d\n", regs, call_number);
462#endif
463
464	if (call_number < 0 || call_number >= mach_trap_count) {
465		i386_exception(EXC_SYSCALL, call_number, 1);
466		/* NOTREACHED */
467	}
468	mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function;
469
470	if (mach_call == (mach_call_t)kern_invalid) {
471		DEBUG_KPRINT_SYSCALL_MACH(
472			"mach_call_munger: kern_invalid 0x%x\n", regs->eax);
473		i386_exception(EXC_SYSCALL, call_number, 1);
474		/* NOTREACHED */
475	}
476
477	argc = mach_trap_table[call_number].mach_trap_arg_count;
478	if (argc) {
479		retval = mach_call_arg_munger32(regs->uesp, argc, call_number, &args);
480		if (retval != KERN_SUCCESS) {
481			regs->eax = retval;
482
483			DEBUG_KPRINT_SYSCALL_MACH(
484				"mach_call_munger: retval=0x%x\n", retval);
485
486			thread_exception_return();
487			/* NOTREACHED */
488		}
489	}
490
491#ifdef MACH_BSD
492	mach_kauth_cred_uthread_update();
493#endif
494
495	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
496		MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) | DBG_FUNC_START,
497		args.arg1, args.arg2, args.arg3, args.arg4, 0);
498
499	retval = mach_call(&args);
500
501	DEBUG_KPRINT_SYSCALL_MACH("mach_call_munger: retval=0x%x\n", retval);
502
503	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
504		MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END,
505		retval, 0, 0, 0, 0);
506
507	regs->eax = retval;
508
509	throttle_lowpri_io(TRUE);
510
511	thread_exception_return();
512	/* NOTREACHED */
513}
514
515
516__private_extern__ void mach_call_munger64(x86_saved_state_t *regs);
517
518void
519mach_call_munger64(x86_saved_state_t *state)
520{
521	int call_number;
522	int argc;
523	mach_call_t mach_call;
524	x86_saved_state64_t	*regs;
525
526	assert(is_saved_state64(state));
527	regs = saved_state64(state);
528
529	call_number = (int)(regs->rax & SYSCALL_NUMBER_MASK);
530
531	DEBUG_KPRINT_SYSCALL_MACH(
532		"mach_call_munger64: code=%d(%s)\n",
533		call_number, mach_syscall_name_table[call_number]);
534
535	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
536		MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_START,
537		regs->rdi, regs->rsi, regs->rdx, regs->r10, 0);
538
539	if (call_number < 0 || call_number >= mach_trap_count) {
540	        i386_exception(EXC_SYSCALL, regs->rax, 1);
541		/* NOTREACHED */
542	}
543	mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function;
544
545	if (mach_call == (mach_call_t)kern_invalid) {
546	        i386_exception(EXC_SYSCALL, regs->rax, 1);
547		/* NOTREACHED */
548	}
549	argc = mach_trap_table[call_number].mach_trap_arg_count;
550
551	if (argc > 6) {
552	        int copyin_count;
553
554		copyin_count = (argc - 6) * (int)sizeof(uint64_t);
555
556	        if (copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&regs->v_arg6, copyin_count)) {
557		        regs->rax = KERN_INVALID_ARGUMENT;
558
559			thread_exception_return();
560			/* NOTREACHED */
561		}
562	}
563
564#ifdef MACH_BSD
565	mach_kauth_cred_uthread_update();
566#endif
567
568	regs->rax = (uint64_t)mach_call((void *)(&regs->rdi));
569
570	DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger64: retval=0x%llx\n", regs->rax);
571
572	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
573		MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END,
574		regs->rax, 0, 0, 0, 0);
575
576	throttle_lowpri_io(TRUE);
577
578	thread_exception_return();
579	/* NOTREACHED */
580}
581
582
583/*
584 * thread_setuserstack:
585 *
586 * Sets the user stack pointer into the machine
587 * dependent thread state info.
588 */
589void
590thread_setuserstack(
591	thread_t	thread,
592	mach_vm_address_t	user_stack)
593{
594	pal_register_cache_state(thread, DIRTY);
595	if (thread_is_64bit(thread)) {
596		x86_saved_state64_t	*iss64;
597
598		iss64 = USER_REGS64(thread);
599
600		iss64->isf.rsp = (uint64_t)user_stack;
601	} else {
602		x86_saved_state32_t	*iss32;
603
604		iss32 = USER_REGS32(thread);
605
606		iss32->uesp = CAST_DOWN_EXPLICIT(unsigned int, user_stack);
607	}
608}
609
610/*
611 * thread_adjuserstack:
612 *
613 * Returns the adjusted user stack pointer from the machine
614 * dependent thread state info.  Used for small (<2G) deltas.
615 */
616uint64_t
617thread_adjuserstack(
618	thread_t	thread,
619	int		adjust)
620{
621	pal_register_cache_state(thread, DIRTY);
622	if (thread_is_64bit(thread)) {
623		x86_saved_state64_t	*iss64;
624
625		iss64 = USER_REGS64(thread);
626
627		iss64->isf.rsp += adjust;
628
629		return iss64->isf.rsp;
630	} else {
631		x86_saved_state32_t	*iss32;
632
633		iss32 = USER_REGS32(thread);
634
635		iss32->uesp += adjust;
636
637		return CAST_USER_ADDR_T(iss32->uesp);
638	}
639}
640
641/*
642 * thread_setentrypoint:
643 *
644 * Sets the user PC into the machine
645 * dependent thread state info.
646 */
647void
648thread_setentrypoint(thread_t thread, mach_vm_address_t entry)
649{
650	pal_register_cache_state(thread, DIRTY);
651	if (thread_is_64bit(thread)) {
652		x86_saved_state64_t	*iss64;
653
654		iss64 = USER_REGS64(thread);
655
656		iss64->isf.rip = (uint64_t)entry;
657	} else {
658		x86_saved_state32_t	*iss32;
659
660		iss32 = USER_REGS32(thread);
661
662		iss32->eip = CAST_DOWN_EXPLICIT(unsigned int, entry);
663	}
664}
665
666
667kern_return_t
668thread_setsinglestep(thread_t thread, int on)
669{
670	pal_register_cache_state(thread, DIRTY);
671	if (thread_is_64bit(thread)) {
672		x86_saved_state64_t	*iss64;
673
674		iss64 = USER_REGS64(thread);
675
676		if (on)
677			iss64->isf.rflags |= EFL_TF;
678		else
679			iss64->isf.rflags &= ~EFL_TF;
680	} else {
681		x86_saved_state32_t	*iss32;
682
683		iss32 = USER_REGS32(thread);
684
685		if (on) {
686			iss32->efl |= EFL_TF;
687			/* Ensure IRET */
688			if (iss32->cs == SYSENTER_CS)
689				iss32->cs = SYSENTER_TF_CS;
690		}
691		else
692			iss32->efl &= ~EFL_TF;
693	}
694
695	return (KERN_SUCCESS);
696}
697
698
699
700/* XXX this should be a struct savearea so that CHUD will work better on x86 */
701void *
702find_user_regs(thread_t thread)
703{
704	pal_register_cache_state(thread, DIRTY);
705	return USER_STATE(thread);
706}
707
708void *
709get_user_regs(thread_t th)
710{
711	pal_register_cache_state(th, DIRTY);
712	return(USER_STATE(th));
713}
714
715#if CONFIG_DTRACE
716/*
717 * DTrace would like to have a peek at the kernel interrupt state, if available.
718 * Based on osfmk/chud/i386/chud_thread_i386.c:chudxnu_thread_get_state(), which see.
719 */
720x86_saved_state_t *find_kern_regs(thread_t);
721
722x86_saved_state_t *
723find_kern_regs(thread_t thread)
724{
725	if (thread == current_thread() &&
726		NULL != current_cpu_datap()->cpu_int_state &&
727		!(USER_STATE(thread) == current_cpu_datap()->cpu_int_state &&
728		  current_cpu_datap()->cpu_interrupt_level == 1)) {
729
730		return current_cpu_datap()->cpu_int_state;
731	} else {
732		return NULL;
733	}
734}
735
736vm_offset_t dtrace_get_cpu_int_stack_top(void);
737
738vm_offset_t
739dtrace_get_cpu_int_stack_top(void)
740{
741	return current_cpu_datap()->cpu_int_stack_top;
742}
743#endif
744