1/*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#ifdef	MACH_BSD
29#include <mach_rt.h>
30#include <mach_debug.h>
31#include <mach_ldebug.h>
32
33#include <mach/kern_return.h>
34#include <mach/mach_traps.h>
35#include <mach/thread_status.h>
36#include <mach/vm_param.h>
37
38#include <kern/counters.h>
39#include <kern/cpu_data.h>
40#include <kern/mach_param.h>
41#include <kern/task.h>
42#include <kern/thread.h>
43#include <kern/sched_prim.h>
44#include <kern/misc_protos.h>
45#include <kern/assert.h>
46#include <kern/debug.h>
47#include <kern/spl.h>
48#include <kern/syscall_sw.h>
49#include <ipc/ipc_port.h>
50#include <vm/vm_kern.h>
51#include <vm/pmap.h>
52
53#include <i386/cpu_number.h>
54#include <i386/eflags.h>
55#include <i386/proc_reg.h>
56#include <i386/tss.h>
57#include <i386/user_ldt.h>
58#include <i386/fpu.h>
59#include <i386/machdep_call.h>
60#include <i386/vmparam.h>
61#include <i386/mp_desc.h>
62#include <i386/misc_protos.h>
63#include <i386/thread.h>
64#include <i386/trap.h>
65#include <i386/seg.h>
66#include <mach/i386/syscall_sw.h>
67#include <sys/syscall.h>
68#include <sys/kdebug.h>
69#include <sys/errno.h>
70#include <../bsd/sys/sysent.h>
71
72#ifdef MACH_BSD
73extern void	mach_kauth_cred_uthread_update(void);
74extern void throttle_lowpri_io(int);
75#endif
76
77void * find_user_regs(thread_t);
78
79unsigned int get_msr_exportmask(void);
80
81unsigned int get_msr_nbits(void);
82
83unsigned int get_msr_rbits(void);
84
85/*
86 * thread_userstack:
87 *
88 * Return the user stack pointer from the machine
89 * dependent thread state info.
90 */
91kern_return_t
92thread_userstack(
93    __unused thread_t   thread,
94    int                 flavor,
95    thread_state_t      tstate,
96    __unused unsigned int        count,
97    mach_vm_offset_t    *user_stack,
98	int					*customstack
99)
100{
101	if (customstack)
102		*customstack = 0;
103
104	switch (flavor) {
105	case x86_THREAD_STATE32:
106		{
107			x86_thread_state32_t *state25;
108
109			state25 = (x86_thread_state32_t *) tstate;
110
111			if (state25->esp) {
112				*user_stack = state25->esp;
113				if (customstack)
114					*customstack = 1;
115			} else {
116				*user_stack = VM_USRSTACK32;
117				if (customstack)
118					*customstack = 0;
119			}
120			break;
121		}
122
123	case x86_THREAD_STATE64:
124		{
125			x86_thread_state64_t *state25;
126
127			state25 = (x86_thread_state64_t *) tstate;
128
129			if (state25->rsp) {
130				*user_stack = state25->rsp;
131				if (customstack)
132					*customstack = 1;
133			} else {
134				*user_stack = VM_USRSTACK64;
135				if (customstack)
136					*customstack = 0;
137			}
138			break;
139		}
140
141	default:
142		return (KERN_INVALID_ARGUMENT);
143	}
144
145	return (KERN_SUCCESS);
146}
147
148/*
149 * thread_userstackdefault:
150 *
151 * Return the default stack location for the
152 * thread, if otherwise unknown.
153 */
154kern_return_t
155thread_userstackdefault(
156	thread_t thread,
157	mach_vm_offset_t *default_user_stack)
158{
159	if (thread_is_64bit(thread)) {
160		*default_user_stack = VM_USRSTACK64;
161	} else {
162		*default_user_stack = VM_USRSTACK32;
163	}
164	return (KERN_SUCCESS);
165}
166
167kern_return_t
168thread_entrypoint(
169    __unused thread_t   thread,
170    int                 flavor,
171    thread_state_t      tstate,
172    __unused unsigned int        count,
173    mach_vm_offset_t    *entry_point
174)
175{
176	/*
177	 * Set a default.
178	 */
179	if (*entry_point == 0)
180		*entry_point = VM_MIN_ADDRESS;
181
182	switch (flavor) {
183	case x86_THREAD_STATE32:
184		{
185			x86_thread_state32_t *state25;
186
187			state25 = (i386_thread_state_t *) tstate;
188			*entry_point = state25->eip ? state25->eip: VM_MIN_ADDRESS;
189			break;
190		}
191
192	case x86_THREAD_STATE64:
193		{
194			x86_thread_state64_t *state25;
195
196			state25 = (x86_thread_state64_t *) tstate;
197			*entry_point = state25->rip ? state25->rip: VM_MIN_ADDRESS64;
198			break;
199		}
200	}
201	return (KERN_SUCCESS);
202}
203
204/*
205 * FIXME - thread_set_child
206 */
207
208void thread_set_child(thread_t child, int pid);
209void
210thread_set_child(thread_t child, int pid)
211{
212	pal_register_cache_state(child, DIRTY);
213
214	if (thread_is_64bit(child)) {
215		x86_saved_state64_t	*iss64;
216
217		iss64 = USER_REGS64(child);
218
219		iss64->rax = pid;
220		iss64->rdx = 1;
221		iss64->isf.rflags &= ~EFL_CF;
222	} else {
223		x86_saved_state32_t	*iss32;
224
225		iss32 = USER_REGS32(child);
226
227		iss32->eax = pid;
228		iss32->edx = 1;
229		iss32->efl &= ~EFL_CF;
230	}
231}
232
233
234
235/*
236 * System Call handling code
237 */
238
239extern long fuword(vm_offset_t);
240
241
242
243void
244machdep_syscall(x86_saved_state_t *state)
245{
246	int			args[machdep_call_count];
247	int			trapno;
248	int			nargs;
249	const machdep_call_t	*entry;
250	x86_saved_state32_t	*regs;
251
252	assert(is_saved_state32(state));
253	regs = saved_state32(state);
254
255	trapno = regs->eax;
256#if DEBUG_TRACE
257	kprintf("machdep_syscall(0x%08x) code=%d\n", regs, trapno);
258#endif
259
260	DEBUG_KPRINT_SYSCALL_MDEP(
261		"machdep_syscall: trapno=%d\n", trapno);
262
263	if (trapno < 0 || trapno >= machdep_call_count) {
264		regs->eax = (unsigned int)kern_invalid(NULL);
265
266		thread_exception_return();
267		/* NOTREACHED */
268	}
269	entry = &machdep_call_table[trapno];
270	nargs = entry->nargs;
271
272	if (nargs != 0) {
273		if (copyin((user_addr_t) regs->uesp + sizeof (int),
274				(char *) args, (nargs * sizeof (int)))) {
275			regs->eax = KERN_INVALID_ADDRESS;
276
277			thread_exception_return();
278			/* NOTREACHED */
279		}
280	}
281	switch (nargs) {
282	case 0:
283		regs->eax = (*entry->routine.args_0)();
284		break;
285	case 1:
286		regs->eax = (*entry->routine.args_1)(args[0]);
287		break;
288	case 2:
289		regs->eax = (*entry->routine.args_2)(args[0],args[1]);
290		break;
291	case 3:
292		if (!entry->bsd_style)
293			regs->eax = (*entry->routine.args_3)(args[0],args[1],args[2]);
294		else {
295			int	error;
296			uint32_t	rval;
297
298			error = (*entry->routine.args_bsd_3)(&rval, args[0], args[1], args[2]);
299			if (error) {
300				regs->eax = error;
301				regs->efl |= EFL_CF;	/* carry bit */
302			} else {
303				regs->eax = rval;
304				regs->efl &= ~EFL_CF;
305			}
306		}
307		break;
308	case 4:
309		regs->eax = (*entry->routine.args_4)(args[0], args[1], args[2], args[3]);
310		break;
311
312	default:
313		panic("machdep_syscall: too many args");
314	}
315	if (current_thread()->funnel_lock)
316		(void) thread_funnel_set(current_thread()->funnel_lock, FALSE);
317
318	DEBUG_KPRINT_SYSCALL_MDEP("machdep_syscall: retval=%u\n", regs->eax);
319
320	throttle_lowpri_io(1);
321
322	thread_exception_return();
323	/* NOTREACHED */
324}
325
326
327void
328machdep_syscall64(x86_saved_state_t *state)
329{
330	int			trapno;
331	const machdep_call_t	*entry;
332	x86_saved_state64_t	*regs;
333
334	assert(is_saved_state64(state));
335	regs = saved_state64(state);
336
337	trapno = (int)(regs->rax & SYSCALL_NUMBER_MASK);
338
339	DEBUG_KPRINT_SYSCALL_MDEP(
340		"machdep_syscall64: trapno=%d\n", trapno);
341
342	if (trapno < 0 || trapno >= machdep_call_count) {
343		regs->rax = (unsigned int)kern_invalid(NULL);
344
345		thread_exception_return();
346		/* NOTREACHED */
347	}
348	entry = &machdep_call_table64[trapno];
349
350	switch (entry->nargs) {
351	case 0:
352		regs->rax = (*entry->routine.args_0)();
353		break;
354	case 1:
355		regs->rax = (*entry->routine.args64_1)(regs->rdi);
356		break;
357	default:
358		panic("machdep_syscall64: too many args");
359	}
360	if (current_thread()->funnel_lock)
361		(void) thread_funnel_set(current_thread()->funnel_lock, FALSE);
362
363	DEBUG_KPRINT_SYSCALL_MDEP("machdep_syscall: retval=%llu\n", regs->rax);
364
365	throttle_lowpri_io(1);
366
367	thread_exception_return();
368	/* NOTREACHED */
369}
370
371#endif	/* MACH_BSD */
372
373
374typedef kern_return_t (*mach_call_t)(void *);
375
376struct mach_call_args {
377	syscall_arg_t arg1;
378	syscall_arg_t arg2;
379	syscall_arg_t arg3;
380	syscall_arg_t arg4;
381	syscall_arg_t arg5;
382	syscall_arg_t arg6;
383	syscall_arg_t arg7;
384	syscall_arg_t arg8;
385	syscall_arg_t arg9;
386};
387
388static kern_return_t
389mach_call_arg_munger32(uint32_t sp, struct mach_call_args *args, const mach_trap_t *trapp);
390
391
392static kern_return_t
393mach_call_arg_munger32(uint32_t sp, struct mach_call_args *args, const mach_trap_t *trapp)
394{
395	if (copyin((user_addr_t)(sp + sizeof(int)), (char *)args, trapp->mach_trap_u32_words * sizeof (int)))
396		return KERN_INVALID_ARGUMENT;
397	trapp->mach_trap_arg_munge32(NULL, args);
398	return KERN_SUCCESS;
399}
400
401
402__private_extern__ void mach_call_munger(x86_saved_state_t *state);
403
404extern const char *mach_syscall_name_table[];
405
406void
407mach_call_munger(x86_saved_state_t *state)
408{
409	int argc;
410	int call_number;
411	mach_call_t mach_call;
412	kern_return_t retval;
413	struct mach_call_args args = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
414	x86_saved_state32_t	*regs;
415
416	assert(is_saved_state32(state));
417	regs = saved_state32(state);
418
419	call_number = -(regs->eax);
420
421	DEBUG_KPRINT_SYSCALL_MACH(
422		"mach_call_munger: code=%d(%s)\n",
423		call_number, mach_syscall_name_table[call_number]);
424#if DEBUG_TRACE
425	kprintf("mach_call_munger(0x%08x) code=%d\n", regs, call_number);
426#endif
427
428	if (call_number < 0 || call_number >= mach_trap_count) {
429		i386_exception(EXC_SYSCALL, call_number, 1);
430		/* NOTREACHED */
431	}
432	mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function;
433
434	if (mach_call == (mach_call_t)kern_invalid) {
435		DEBUG_KPRINT_SYSCALL_MACH(
436			"mach_call_munger: kern_invalid 0x%x\n", regs->eax);
437		i386_exception(EXC_SYSCALL, call_number, 1);
438		/* NOTREACHED */
439	}
440
441	argc = mach_trap_table[call_number].mach_trap_arg_count;
442	if (argc) {
443		retval = mach_call_arg_munger32(regs->uesp, &args,  &mach_trap_table[call_number]);
444		if (retval != KERN_SUCCESS) {
445			regs->eax = retval;
446
447			DEBUG_KPRINT_SYSCALL_MACH(
448				"mach_call_munger: retval=0x%x\n", retval);
449
450			thread_exception_return();
451			/* NOTREACHED */
452		}
453	}
454
455#ifdef MACH_BSD
456	mach_kauth_cred_uthread_update();
457#endif
458
459	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
460		MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) | DBG_FUNC_START,
461		args.arg1, args.arg2, args.arg3, args.arg4, 0);
462
463	retval = mach_call(&args);
464
465	DEBUG_KPRINT_SYSCALL_MACH("mach_call_munger: retval=0x%x\n", retval);
466
467	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
468		MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END,
469		retval, 0, 0, 0, 0);
470
471	regs->eax = retval;
472
473	throttle_lowpri_io(1);
474
475	thread_exception_return();
476	/* NOTREACHED */
477}
478
479
480__private_extern__ void mach_call_munger64(x86_saved_state_t *regs);
481
482void
483mach_call_munger64(x86_saved_state_t *state)
484{
485	int call_number;
486	int argc;
487	mach_call_t mach_call;
488	x86_saved_state64_t	*regs;
489
490	assert(is_saved_state64(state));
491	regs = saved_state64(state);
492
493	call_number = (int)(regs->rax & SYSCALL_NUMBER_MASK);
494
495	DEBUG_KPRINT_SYSCALL_MACH(
496		"mach_call_munger64: code=%d(%s)\n",
497		call_number, mach_syscall_name_table[call_number]);
498
499	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
500		MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_START,
501		regs->rdi, regs->rsi, regs->rdx, regs->r10, 0);
502
503	if (call_number < 0 || call_number >= mach_trap_count) {
504	        i386_exception(EXC_SYSCALL, regs->rax, 1);
505		/* NOTREACHED */
506	}
507	mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function;
508
509	if (mach_call == (mach_call_t)kern_invalid) {
510	        i386_exception(EXC_SYSCALL, regs->rax, 1);
511		/* NOTREACHED */
512	}
513	argc = mach_trap_table[call_number].mach_trap_arg_count;
514
515	if (argc > 6) {
516	        int copyin_count;
517
518		copyin_count = (argc - 6) * (int)sizeof(uint64_t);
519
520	        if (copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&regs->v_arg6, copyin_count)) {
521		        regs->rax = KERN_INVALID_ARGUMENT;
522
523			thread_exception_return();
524			/* NOTREACHED */
525		}
526	}
527
528#ifdef MACH_BSD
529	mach_kauth_cred_uthread_update();
530#endif
531
532	regs->rax = (uint64_t)mach_call((void *)(&regs->rdi));
533
534	DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger64: retval=0x%llx\n", regs->rax);
535
536	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
537		MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END,
538		regs->rax, 0, 0, 0, 0);
539
540	throttle_lowpri_io(1);
541
542	thread_exception_return();
543	/* NOTREACHED */
544}
545
546
547/*
548 * thread_setuserstack:
549 *
550 * Sets the user stack pointer into the machine
551 * dependent thread state info.
552 */
553void
554thread_setuserstack(
555	thread_t	thread,
556	mach_vm_address_t	user_stack)
557{
558	pal_register_cache_state(thread, DIRTY);
559	if (thread_is_64bit(thread)) {
560		x86_saved_state64_t	*iss64;
561
562		iss64 = USER_REGS64(thread);
563
564		iss64->isf.rsp = (uint64_t)user_stack;
565	} else {
566		x86_saved_state32_t	*iss32;
567
568		iss32 = USER_REGS32(thread);
569
570		iss32->uesp = CAST_DOWN_EXPLICIT(unsigned int, user_stack);
571	}
572}
573
574/*
575 * thread_adjuserstack:
576 *
577 * Returns the adjusted user stack pointer from the machine
578 * dependent thread state info.  Used for small (<2G) deltas.
579 */
580uint64_t
581thread_adjuserstack(
582	thread_t	thread,
583	int		adjust)
584{
585	pal_register_cache_state(thread, DIRTY);
586	if (thread_is_64bit(thread)) {
587		x86_saved_state64_t	*iss64;
588
589		iss64 = USER_REGS64(thread);
590
591		iss64->isf.rsp += adjust;
592
593		return iss64->isf.rsp;
594	} else {
595		x86_saved_state32_t	*iss32;
596
597		iss32 = USER_REGS32(thread);
598
599		iss32->uesp += adjust;
600
601		return CAST_USER_ADDR_T(iss32->uesp);
602	}
603}
604
605/*
606 * thread_setentrypoint:
607 *
608 * Sets the user PC into the machine
609 * dependent thread state info.
610 */
611void
612thread_setentrypoint(thread_t thread, mach_vm_address_t entry)
613{
614	pal_register_cache_state(thread, DIRTY);
615	if (thread_is_64bit(thread)) {
616		x86_saved_state64_t	*iss64;
617
618		iss64 = USER_REGS64(thread);
619
620		iss64->isf.rip = (uint64_t)entry;
621	} else {
622		x86_saved_state32_t	*iss32;
623
624		iss32 = USER_REGS32(thread);
625
626		iss32->eip = CAST_DOWN_EXPLICIT(unsigned int, entry);
627	}
628}
629
630
631kern_return_t
632thread_setsinglestep(thread_t thread, int on)
633{
634	pal_register_cache_state(thread, DIRTY);
635	if (thread_is_64bit(thread)) {
636		x86_saved_state64_t	*iss64;
637
638		iss64 = USER_REGS64(thread);
639
640		if (on)
641			iss64->isf.rflags |= EFL_TF;
642		else
643			iss64->isf.rflags &= ~EFL_TF;
644	} else {
645		x86_saved_state32_t	*iss32;
646
647		iss32 = USER_REGS32(thread);
648
649		if (on) {
650			iss32->efl |= EFL_TF;
651			/* Ensure IRET */
652			if (iss32->cs == SYSENTER_CS)
653				iss32->cs = SYSENTER_TF_CS;
654		}
655		else
656			iss32->efl &= ~EFL_TF;
657	}
658
659	return (KERN_SUCCESS);
660}
661
662
663
664/* XXX this should be a struct savearea so that CHUD will work better on x86 */
665void *
666find_user_regs(thread_t thread)
667{
668	pal_register_cache_state(thread, DIRTY);
669	return USER_STATE(thread);
670}
671
672void *
673get_user_regs(thread_t th)
674{
675	pal_register_cache_state(th, DIRTY);
676	return(USER_STATE(th));
677}
678
679#if CONFIG_DTRACE
680/*
681 * DTrace would like to have a peek at the kernel interrupt state, if available.
682 * Based on osfmk/chud/i386/chud_thread_i386.c:chudxnu_thread_get_state(), which see.
683 */
684x86_saved_state_t *find_kern_regs(thread_t);
685
686x86_saved_state_t *
687find_kern_regs(thread_t thread)
688{
689	if (thread == current_thread() &&
690		NULL != current_cpu_datap()->cpu_int_state &&
691		!(USER_STATE(thread) == current_cpu_datap()->cpu_int_state &&
692		  current_cpu_datap()->cpu_interrupt_level == 1)) {
693
694		return current_cpu_datap()->cpu_int_state;
695	} else {
696		return NULL;
697	}
698}
699
700vm_offset_t dtrace_get_cpu_int_stack_top(void);
701
702vm_offset_t
703dtrace_get_cpu_int_stack_top(void)
704{
705	return current_cpu_datap()->cpu_int_stack_top;
706}
707#endif
708