1/*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#ifdef	MACH_BSD
29#include <mach_rt.h>
30#include <mach_debug.h>
31#include <mach_ldebug.h>
32
33#include <mach/kern_return.h>
34#include <mach/mach_traps.h>
35#include <mach/thread_status.h>
36#include <mach/vm_param.h>
37
38#include <kern/counters.h>
39#include <kern/cpu_data.h>
40#include <kern/mach_param.h>
41#include <kern/task.h>
42#include <kern/thread.h>
43#include <kern/sched_prim.h>
44#include <kern/misc_protos.h>
45#include <kern/assert.h>
46#include <kern/debug.h>
47#include <kern/spl.h>
48#include <kern/syscall_sw.h>
49#include <ipc/ipc_port.h>
50#include <vm/vm_kern.h>
51#include <vm/pmap.h>
52
53#include <i386/cpu_number.h>
54#include <i386/eflags.h>
55#include <i386/proc_reg.h>
56#include <i386/tss.h>
57#include <i386/user_ldt.h>
58#include <i386/fpu.h>
59#include <i386/machdep_call.h>
60#include <i386/vmparam.h>
61#include <i386/mp_desc.h>
62#include <i386/misc_protos.h>
63#include <i386/thread.h>
64#include <i386/trap.h>
65#include <i386/seg.h>
66#include <mach/i386/syscall_sw.h>
67#include <sys/syscall.h>
68#include <sys/kdebug.h>
69#include <sys/errno.h>
70#include <../bsd/sys/sysent.h>
71
72#ifdef MACH_BSD
73extern void	mach_kauth_cred_uthread_update(void);
74extern void throttle_lowpri_io(int);
75#endif
76
77void * find_user_regs(thread_t);
78
79unsigned int get_msr_exportmask(void);
80
81unsigned int get_msr_nbits(void);
82
83unsigned int get_msr_rbits(void);
84
85/*
86 * thread_userstack:
87 *
88 * Return the user stack pointer from the machine
89 * dependent thread state info.
90 */
91kern_return_t
92thread_userstack(
93    __unused thread_t   thread,
94    int                 flavor,
95    thread_state_t      tstate,
96    __unused unsigned int        count,
97    mach_vm_offset_t    *user_stack,
98	int					*customstack
99)
100{
101	if (customstack)
102		*customstack = 0;
103
104	switch (flavor) {
105	case x86_THREAD_STATE32:
106		{
107			x86_thread_state32_t *state25;
108
109			state25 = (x86_thread_state32_t *) tstate;
110
111			if (state25->esp) {
112				*user_stack = state25->esp;
113				if (customstack)
114					*customstack = 1;
115			} else {
116				*user_stack = VM_USRSTACK32;
117				if (customstack)
118					*customstack = 0;
119			}
120			break;
121		}
122
123	case x86_THREAD_STATE64:
124		{
125			x86_thread_state64_t *state25;
126
127			state25 = (x86_thread_state64_t *) tstate;
128
129			if (state25->rsp) {
130				*user_stack = state25->rsp;
131				if (customstack)
132					*customstack = 1;
133			} else {
134				*user_stack = VM_USRSTACK64;
135				if (customstack)
136					*customstack = 0;
137			}
138			break;
139		}
140
141	default:
142		return (KERN_INVALID_ARGUMENT);
143	}
144
145	return (KERN_SUCCESS);
146}
147
148/*
149 * thread_userstackdefault:
150 *
151 * Return the default stack location for the
152 * thread, if otherwise unknown.
153 */
154kern_return_t
155thread_userstackdefault(
156	thread_t thread,
157	mach_vm_offset_t *default_user_stack)
158{
159	if (thread_is_64bit(thread)) {
160		*default_user_stack = VM_USRSTACK64;
161	} else {
162		*default_user_stack = VM_USRSTACK32;
163	}
164	return (KERN_SUCCESS);
165}
166
167kern_return_t
168thread_entrypoint(
169    __unused thread_t   thread,
170    int                 flavor,
171    thread_state_t      tstate,
172    __unused unsigned int        count,
173    mach_vm_offset_t    *entry_point
174)
175{
176	/*
177	 * Set a default.
178	 */
179	if (*entry_point == 0)
180		*entry_point = VM_MIN_ADDRESS;
181
182	switch (flavor) {
183	case x86_THREAD_STATE32:
184		{
185			x86_thread_state32_t *state25;
186
187			state25 = (i386_thread_state_t *) tstate;
188			*entry_point = state25->eip ? state25->eip: VM_MIN_ADDRESS;
189			break;
190		}
191
192	case x86_THREAD_STATE64:
193		{
194			x86_thread_state64_t *state25;
195
196			state25 = (x86_thread_state64_t *) tstate;
197			*entry_point = state25->rip ? state25->rip: VM_MIN_ADDRESS64;
198			break;
199		}
200	}
201	return (KERN_SUCCESS);
202}
203
204/*
205 * FIXME - thread_set_child
206 */
207
208void thread_set_child(thread_t child, int pid);
209void
210thread_set_child(thread_t child, int pid)
211{
212	pal_register_cache_state(child, DIRTY);
213
214	if (thread_is_64bit(child)) {
215		x86_saved_state64_t	*iss64;
216
217		iss64 = USER_REGS64(child);
218
219		iss64->rax = pid;
220		iss64->rdx = 1;
221		iss64->isf.rflags &= ~EFL_CF;
222	} else {
223		x86_saved_state32_t	*iss32;
224
225		iss32 = USER_REGS32(child);
226
227		iss32->eax = pid;
228		iss32->edx = 1;
229		iss32->efl &= ~EFL_CF;
230	}
231}
232
233
234
235/*
236 * System Call handling code
237 */
238
239extern long fuword(vm_offset_t);
240
241
242
243void
244machdep_syscall(x86_saved_state_t *state)
245{
246	int			args[machdep_call_count];
247	int			trapno;
248	int			nargs;
249	const machdep_call_t	*entry;
250	x86_saved_state32_t	*regs;
251
252	assert(is_saved_state32(state));
253	regs = saved_state32(state);
254
255	trapno = regs->eax;
256#if DEBUG_TRACE
257	kprintf("machdep_syscall(0x%08x) code=%d\n", regs, trapno);
258#endif
259
260	DEBUG_KPRINT_SYSCALL_MDEP(
261		"machdep_syscall: trapno=%d\n", trapno);
262
263	if (trapno < 0 || trapno >= machdep_call_count) {
264		regs->eax = (unsigned int)kern_invalid(NULL);
265
266		thread_exception_return();
267		/* NOTREACHED */
268	}
269	entry = &machdep_call_table[trapno];
270	nargs = entry->nargs;
271
272	if (nargs != 0) {
273		if (copyin((user_addr_t) regs->uesp + sizeof (int),
274				(char *) args, (nargs * sizeof (int)))) {
275			regs->eax = KERN_INVALID_ADDRESS;
276
277			thread_exception_return();
278			/* NOTREACHED */
279		}
280	}
281	switch (nargs) {
282	case 0:
283		regs->eax = (*entry->routine.args_0)();
284		break;
285	case 1:
286		regs->eax = (*entry->routine.args_1)(args[0]);
287		break;
288	case 2:
289		regs->eax = (*entry->routine.args_2)(args[0],args[1]);
290		break;
291	case 3:
292		if (!entry->bsd_style)
293			regs->eax = (*entry->routine.args_3)(args[0],args[1],args[2]);
294		else {
295			int	error;
296			uint32_t	rval;
297
298			error = (*entry->routine.args_bsd_3)(&rval, args[0], args[1], args[2]);
299			if (error) {
300				regs->eax = error;
301				regs->efl |= EFL_CF;	/* carry bit */
302			} else {
303				regs->eax = rval;
304				regs->efl &= ~EFL_CF;
305			}
306		}
307		break;
308	case 4:
309		regs->eax = (*entry->routine.args_4)(args[0], args[1], args[2], args[3]);
310		break;
311
312	default:
313		panic("machdep_syscall: too many args");
314	}
315
316	DEBUG_KPRINT_SYSCALL_MDEP("machdep_syscall: retval=%u\n", regs->eax);
317
318	throttle_lowpri_io(1);
319
320	thread_exception_return();
321	/* NOTREACHED */
322}
323
324
325void
326machdep_syscall64(x86_saved_state_t *state)
327{
328	int			trapno;
329	const machdep_call_t	*entry;
330	x86_saved_state64_t	*regs;
331
332	assert(is_saved_state64(state));
333	regs = saved_state64(state);
334
335	trapno = (int)(regs->rax & SYSCALL_NUMBER_MASK);
336
337	DEBUG_KPRINT_SYSCALL_MDEP(
338		"machdep_syscall64: trapno=%d\n", trapno);
339
340	if (trapno < 0 || trapno >= machdep_call_count) {
341		regs->rax = (unsigned int)kern_invalid(NULL);
342
343		thread_exception_return();
344		/* NOTREACHED */
345	}
346	entry = &machdep_call_table64[trapno];
347
348	switch (entry->nargs) {
349	case 0:
350		regs->rax = (*entry->routine.args_0)();
351		break;
352	case 1:
353		regs->rax = (*entry->routine.args64_1)(regs->rdi);
354		break;
355	case 2:
356		regs->rax = (*entry->routine.args64_2)(regs->rdi, regs->rsi);
357		break;
358	default:
359		panic("machdep_syscall64: too many args");
360	}
361
362	DEBUG_KPRINT_SYSCALL_MDEP("machdep_syscall: retval=%llu\n", regs->rax);
363
364	throttle_lowpri_io(1);
365
366	thread_exception_return();
367	/* NOTREACHED */
368}
369
370#endif	/* MACH_BSD */
371
372
373typedef kern_return_t (*mach_call_t)(void *);
374
375struct mach_call_args {
376	syscall_arg_t arg1;
377	syscall_arg_t arg2;
378	syscall_arg_t arg3;
379	syscall_arg_t arg4;
380	syscall_arg_t arg5;
381	syscall_arg_t arg6;
382	syscall_arg_t arg7;
383	syscall_arg_t arg8;
384	syscall_arg_t arg9;
385};
386
387static kern_return_t
388mach_call_arg_munger32(uint32_t sp, struct mach_call_args *args, const mach_trap_t *trapp);
389
390
391static kern_return_t
392mach_call_arg_munger32(uint32_t sp, struct mach_call_args *args, const mach_trap_t *trapp)
393{
394	if (copyin((user_addr_t)(sp + sizeof(int)), (char *)args, trapp->mach_trap_u32_words * sizeof (int)))
395		return KERN_INVALID_ARGUMENT;
396#if CONFIG_REQUIRES_U32_MUNGING
397	trapp->mach_trap_arg_munge32(args);
398#else
399#error U32 mach traps on x86_64 kernel requires munging
400#endif
401	return KERN_SUCCESS;
402}
403
404
405__private_extern__ void mach_call_munger(x86_saved_state_t *state);
406
407extern const char *mach_syscall_name_table[];
408
409void
410mach_call_munger(x86_saved_state_t *state)
411{
412	int argc;
413	int call_number;
414	mach_call_t mach_call;
415	kern_return_t retval;
416	struct mach_call_args args = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
417	x86_saved_state32_t	*regs;
418
419	assert(is_saved_state32(state));
420	regs = saved_state32(state);
421
422	call_number = -(regs->eax);
423
424	DEBUG_KPRINT_SYSCALL_MACH(
425		"mach_call_munger: code=%d(%s)\n",
426		call_number, mach_syscall_name_table[call_number]);
427#if DEBUG_TRACE
428	kprintf("mach_call_munger(0x%08x) code=%d\n", regs, call_number);
429#endif
430
431	if (call_number < 0 || call_number >= mach_trap_count) {
432		i386_exception(EXC_SYSCALL, call_number, 1);
433		/* NOTREACHED */
434	}
435	mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function;
436
437	if (mach_call == (mach_call_t)kern_invalid) {
438		DEBUG_KPRINT_SYSCALL_MACH(
439			"mach_call_munger: kern_invalid 0x%x\n", regs->eax);
440		i386_exception(EXC_SYSCALL, call_number, 1);
441		/* NOTREACHED */
442	}
443
444	argc = mach_trap_table[call_number].mach_trap_arg_count;
445	if (argc) {
446		retval = mach_call_arg_munger32(regs->uesp, &args,  &mach_trap_table[call_number]);
447		if (retval != KERN_SUCCESS) {
448			regs->eax = retval;
449
450			DEBUG_KPRINT_SYSCALL_MACH(
451				"mach_call_munger: retval=0x%x\n", retval);
452
453			thread_exception_return();
454			/* NOTREACHED */
455		}
456	}
457
458#ifdef MACH_BSD
459	mach_kauth_cred_uthread_update();
460#endif
461
462	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
463		MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) | DBG_FUNC_START,
464		args.arg1, args.arg2, args.arg3, args.arg4, 0);
465
466	retval = mach_call(&args);
467
468	DEBUG_KPRINT_SYSCALL_MACH("mach_call_munger: retval=0x%x\n", retval);
469
470	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
471		MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END,
472		retval, 0, 0, 0, 0);
473
474	regs->eax = retval;
475
476	throttle_lowpri_io(1);
477
478	thread_exception_return();
479	/* NOTREACHED */
480}
481
482
483__private_extern__ void mach_call_munger64(x86_saved_state_t *regs);
484
485void
486mach_call_munger64(x86_saved_state_t *state)
487{
488	int call_number;
489	int argc;
490	mach_call_t mach_call;
491	struct mach_call_args args = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
492	x86_saved_state64_t	*regs;
493
494	assert(is_saved_state64(state));
495	regs = saved_state64(state);
496
497	call_number = (int)(regs->rax & SYSCALL_NUMBER_MASK);
498
499	DEBUG_KPRINT_SYSCALL_MACH(
500		"mach_call_munger64: code=%d(%s)\n",
501		call_number, mach_syscall_name_table[call_number]);
502
503	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
504		MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_START,
505		regs->rdi, regs->rsi, regs->rdx, regs->r10, 0);
506
507	if (call_number < 0 || call_number >= mach_trap_count) {
508	        i386_exception(EXC_SYSCALL, regs->rax, 1);
509		/* NOTREACHED */
510	}
511	mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function;
512
513	if (mach_call == (mach_call_t)kern_invalid) {
514	        i386_exception(EXC_SYSCALL, regs->rax, 1);
515		/* NOTREACHED */
516	}
517	argc = mach_trap_table[call_number].mach_trap_arg_count;
518	if (argc) {
519		int args_in_regs = MIN(6, argc);
520
521		memcpy(&args.arg1, &regs->rdi, args_in_regs * sizeof(syscall_arg_t));
522
523		if (argc > 6) {
524	        int copyin_count;
525
526			assert(argc <= 9);
527			copyin_count = (argc - 6) * (int)sizeof(syscall_arg_t);
528
529	        if (copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&args.arg7, copyin_count)) {
530		        regs->rax = KERN_INVALID_ARGUMENT;
531
532				thread_exception_return();
533				/* NOTREACHED */
534			}
535		}
536	}
537
538#ifdef MACH_BSD
539	mach_kauth_cred_uthread_update();
540#endif
541
542	regs->rax = (uint64_t)mach_call((void *)&args);
543
544	DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger64: retval=0x%llx\n", regs->rax);
545
546	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
547		MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END,
548		regs->rax, 0, 0, 0, 0);
549
550	throttle_lowpri_io(1);
551
552	thread_exception_return();
553	/* NOTREACHED */
554}
555
556
557/*
558 * thread_setuserstack:
559 *
560 * Sets the user stack pointer into the machine
561 * dependent thread state info.
562 */
563void
564thread_setuserstack(
565	thread_t	thread,
566	mach_vm_address_t	user_stack)
567{
568	pal_register_cache_state(thread, DIRTY);
569	if (thread_is_64bit(thread)) {
570		x86_saved_state64_t	*iss64;
571
572		iss64 = USER_REGS64(thread);
573
574		iss64->isf.rsp = (uint64_t)user_stack;
575	} else {
576		x86_saved_state32_t	*iss32;
577
578		iss32 = USER_REGS32(thread);
579
580		iss32->uesp = CAST_DOWN_EXPLICIT(unsigned int, user_stack);
581	}
582}
583
584/*
585 * thread_adjuserstack:
586 *
587 * Returns the adjusted user stack pointer from the machine
588 * dependent thread state info.  Used for small (<2G) deltas.
589 */
590uint64_t
591thread_adjuserstack(
592	thread_t	thread,
593	int		adjust)
594{
595	pal_register_cache_state(thread, DIRTY);
596	if (thread_is_64bit(thread)) {
597		x86_saved_state64_t	*iss64;
598
599		iss64 = USER_REGS64(thread);
600
601		iss64->isf.rsp += adjust;
602
603		return iss64->isf.rsp;
604	} else {
605		x86_saved_state32_t	*iss32;
606
607		iss32 = USER_REGS32(thread);
608
609		iss32->uesp += adjust;
610
611		return CAST_USER_ADDR_T(iss32->uesp);
612	}
613}
614
615/*
616 * thread_setentrypoint:
617 *
618 * Sets the user PC into the machine
619 * dependent thread state info.
620 */
621void
622thread_setentrypoint(thread_t thread, mach_vm_address_t entry)
623{
624	pal_register_cache_state(thread, DIRTY);
625	if (thread_is_64bit(thread)) {
626		x86_saved_state64_t	*iss64;
627
628		iss64 = USER_REGS64(thread);
629
630		iss64->isf.rip = (uint64_t)entry;
631	} else {
632		x86_saved_state32_t	*iss32;
633
634		iss32 = USER_REGS32(thread);
635
636		iss32->eip = CAST_DOWN_EXPLICIT(unsigned int, entry);
637	}
638}
639
640
641kern_return_t
642thread_setsinglestep(thread_t thread, int on)
643{
644	pal_register_cache_state(thread, DIRTY);
645	if (thread_is_64bit(thread)) {
646		x86_saved_state64_t	*iss64;
647
648		iss64 = USER_REGS64(thread);
649
650		if (on)
651			iss64->isf.rflags |= EFL_TF;
652		else
653			iss64->isf.rflags &= ~EFL_TF;
654	} else {
655		x86_saved_state32_t	*iss32;
656
657		iss32 = USER_REGS32(thread);
658
659		if (on) {
660			iss32->efl |= EFL_TF;
661			/* Ensure IRET */
662			if (iss32->cs == SYSENTER_CS)
663				iss32->cs = SYSENTER_TF_CS;
664		}
665		else
666			iss32->efl &= ~EFL_TF;
667	}
668
669	return (KERN_SUCCESS);
670}
671
672
673
674/* XXX this should be a struct savearea so that CHUD will work better on x86 */
675void *
676find_user_regs(thread_t thread)
677{
678	pal_register_cache_state(thread, DIRTY);
679	return USER_STATE(thread);
680}
681
682void *
683get_user_regs(thread_t th)
684{
685	pal_register_cache_state(th, DIRTY);
686	return(USER_STATE(th));
687}
688
689#if CONFIG_DTRACE
690/*
691 * DTrace would like to have a peek at the kernel interrupt state, if available.
692 * Based on osfmk/chud/i386/chud_thread_i386.c:chudxnu_thread_get_state(), which see.
693 */
694x86_saved_state_t *find_kern_regs(thread_t);
695
696x86_saved_state_t *
697find_kern_regs(thread_t thread)
698{
699	if (thread == current_thread() &&
700		NULL != current_cpu_datap()->cpu_int_state &&
701		!(USER_STATE(thread) == current_cpu_datap()->cpu_int_state &&
702		  current_cpu_datap()->cpu_interrupt_level == 1)) {
703
704		return current_cpu_datap()->cpu_int_state;
705	} else {
706		return NULL;
707	}
708}
709
710vm_offset_t dtrace_get_cpu_int_stack_top(void);
711
712vm_offset_t
713dtrace_get_cpu_int_stack_top(void)
714{
715	return current_cpu_datap()->cpu_int_stack_top;
716}
717#endif
718