1/*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#include <kern/task.h>
29#include <kern/thread.h>
30#include <kern/assert.h>
31#include <kern/clock.h>
32#include <kern/locks.h>
33#include <kern/sched_prim.h>
34#include <mach/machine/thread_status.h>
35#include <mach/thread_act.h>
36
37#include <sys/kernel.h>
38#include <sys/vm.h>
39#include <sys/proc_internal.h>
40#include <sys/syscall.h>
41#include <sys/systm.h>
42#include <sys/user.h>
43#include <sys/errno.h>
44#include <sys/kdebug.h>
45#include <sys/sysent.h>
46#include <sys/sysproto.h>
47#include <sys/kauth.h>
48#include <sys/systm.h>
49
50#include <bsm/audit_kernel.h>
51
52#include <i386/seg.h>
53#include <i386/machine_routines.h>
54#include <mach/i386/syscall_sw.h>
55
56#if CONFIG_DTRACE
57extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *);
58extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
59#endif
60
61extern void unix_syscall(x86_saved_state_t *);
62extern void unix_syscall64(x86_saved_state_t *);
63extern void *find_user_regs(thread_t);
64
65extern void x86_toggle_sysenter_arg_store(thread_t thread, boolean_t valid);
66extern boolean_t x86_sysenter_arg_store_isvalid(thread_t thread);
67/*
68 * Function:	unix_syscall
69 *
70 * Inputs:	regs	- pointer to i386 save area
71 *
72 * Outputs:	none
73 */
74void
75unix_syscall(x86_saved_state_t *state)
76{
77	thread_t		thread;
78	void			*vt;
79	unsigned int		code;
80	struct sysent		*callp;
81
82	int			error;
83	vm_offset_t		params;
84	struct proc		*p;
85	struct uthread		*uthread;
86	x86_saved_state32_t	*regs;
87	boolean_t		args_in_uthread;
88
89	assert(is_saved_state32(state));
90	regs = saved_state32(state);
91#if DEBUG
92	if (regs->eax == 0x800)
93		thread_exception_return();
94#endif
95	thread = current_thread();
96	uthread = get_bsdthread_info(thread);
97
98	/* Get the approriate proc; may be different from task's for vfork() */
99	if (!(uthread->uu_flag & UT_VFORK))
100		p = (struct proc *)get_bsdtask_info(current_task());
101	else
102		p = current_proc();
103
104	/* Verify that we are not being called from a task without a proc */
105	if (p == NULL) {
106		regs->eax = EPERM;
107		regs->efl |= EFL_CF;
108		task_terminate_internal(current_task());
109		thread_exception_return();
110		/* NOTREACHED */
111	}
112
113	code = regs->eax & I386_SYSCALL_NUMBER_MASK;
114	args_in_uthread = ((regs->eax & I386_SYSCALL_ARG_BYTES_MASK) != 0) && x86_sysenter_arg_store_isvalid(thread);
115	params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int));
116
117	regs->efl &= ~(EFL_CF);
118
119	callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
120
121	if (callp == sysent) {
122		code = fuword(params);
123		params += sizeof(int);
124		callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
125	}
126
127	vt = (void *)uthread->uu_arg;
128
129	if (callp->sy_arg_bytes != 0) {
130		sy_munge_t	*mungerp;
131
132		assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg));
133		if (!args_in_uthread)
134		{
135			uint32_t nargs;
136			nargs = callp->sy_arg_bytes;
137			error = copyin((user_addr_t) params, (char *) vt, nargs);
138			if (error) {
139				regs->eax = error;
140				regs->efl |= EFL_CF;
141				thread_exception_return();
142				/* NOTREACHED */
143			}
144		}
145
146		if (code != 180) {
147	        	int *ip = (int *)vt;
148
149			KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
150				      *ip, *(ip+1), *(ip+2), *(ip+3), 0);
151		}
152		mungerp = callp->sy_arg_munge32;
153
154		/*
155		 * If non-NULL, then call the syscall argument munger to
156		 * copy in arguments (see xnu/bsd/dev/i386/munge.s); the
157		 * first argument is NULL because we are munging in place
158		 * after a copyin because the ABI currently doesn't use
159		 * registers to pass system call arguments.
160		 */
161		if (mungerp != NULL)
162			(*mungerp)(NULL, vt);
163	} else
164		KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
165			0, 0, 0, 0, 0);
166
167	/*
168	 * Delayed binding of thread credential to process credential, if we
169	 * are not running with an explicitly set thread credential.
170	 */
171	kauth_cred_uthread_update(uthread, p);
172
173	uthread->uu_rval[0] = 0;
174	uthread->uu_rval[1] = regs->edx;
175	uthread->uu_flag |= UT_NOTCANCELPT;
176
177
178#ifdef JOE_DEBUG
179        uthread->uu_iocount = 0;
180        uthread->uu_vpindex = 0;
181#endif
182
183	AUDIT_SYSCALL_ENTER(code, p, uthread);
184	error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0]));
185        AUDIT_SYSCALL_EXIT(code, p, uthread, error);
186
187#ifdef JOE_DEBUG
188        if (uthread->uu_iocount)
189                joe_debug("system call returned with uu_iocount != 0");
190#endif
191#if CONFIG_DTRACE
192	uthread->t_dtrace_errno = error;
193#endif /* CONFIG_DTRACE */
194
195	if (error == ERESTART) {
196		/*
197		 * Move the user's pc back to repeat the syscall:
198		 * 5 bytes for a sysenter, or 2 for an int 8x.
199		 * The SYSENTER_TF_CS covers single-stepping over a sysenter
200		 * - see debug trap handler in idt.s/idt64.s
201		 */
202		if (regs->cs == SYSENTER_CS || regs->cs == SYSENTER_TF_CS) {
203			regs->eip -= 5;
204		}
205		else
206			regs->eip -= 2;
207	}
208	else if (error != EJUSTRETURN) {
209		if (error) {
210		    regs->eax = error;
211		    regs->efl |= EFL_CF;	/* carry bit */
212		} else { /* (not error) */
213		    regs->eax = uthread->uu_rval[0];
214		    regs->edx = uthread->uu_rval[1];
215		}
216	}
217
218	uthread->uu_flag &= ~UT_NOTCANCELPT;
219#if DEBUG
220	/*
221	 * if we're holding the funnel panic
222	 */
223	syscall_exit_funnelcheck();
224#endif /* DEBUG */
225	if (uthread->uu_lowpri_window) {
226	        /*
227		 * task is marked as a low priority I/O type
228		 * and the I/O we issued while in this system call
229		 * collided with normal I/O operations... we'll
230		 * delay in order to mitigate the impact of this
231		 * task on the normal operation of the system
232		 */
233		throttle_lowpri_io(TRUE);
234	}
235	if (code != 180)
236	        KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
237				      error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0);
238
239	thread_exception_return();
240	/* NOTREACHED */
241}
242
243
244void
245unix_syscall64(x86_saved_state_t *state)
246{
247	thread_t	thread;
248	unsigned int	code;
249	struct sysent	*callp;
250	void		*uargp;
251	int		args_in_regs;
252	int		error;
253	struct proc	*p;
254	struct uthread	*uthread;
255	x86_saved_state64_t *regs;
256
257	assert(is_saved_state64(state));
258	regs = saved_state64(state);
259
260	if (regs->rax == 0x2000800)
261		thread_exception_return();
262
263	thread = current_thread();
264	uthread = get_bsdthread_info(thread);
265
266	/* Get the approriate proc; may be different from task's for vfork() */
267	if (!(uthread->uu_flag & UT_VFORK))
268		p = (struct proc *)get_bsdtask_info(current_task());
269	else
270		p = current_proc();
271
272	/* Verify that we are not being called from a task without a proc */
273	if (p == NULL) {
274		regs->rax = EPERM;
275		regs->isf.rflags |= EFL_CF;
276		task_terminate_internal(current_task());
277		thread_exception_return();
278		/* NOTREACHED */
279	}
280	args_in_regs = 6;
281
282	code = regs->rax & SYSCALL_NUMBER_MASK;
283	callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
284	uargp = (void *)(&regs->rdi);
285
286	if (callp == sysent) {
287	        /*
288		 * indirect system call... system call number
289		 * passed as 'arg0'
290		 */
291	        code = regs->rdi;
292		callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
293		uargp = (void *)(&regs->rsi);
294		args_in_regs = 5;
295	}
296
297	if (callp->sy_narg != 0) {
298		if (code != 180) {
299			uint64_t *ip = (uint64_t *)uargp;
300
301			KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
302					(int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0);
303		}
304		assert(callp->sy_narg <= 8);
305
306		if (callp->sy_narg > args_in_regs) {
307			int copyin_count;
308
309			copyin_count = (callp->sy_narg - args_in_regs) * sizeof(uint64_t);
310
311			error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&regs->v_arg6, copyin_count);
312			if (error) {
313				regs->rax = error;
314				regs->isf.rflags |= EFL_CF;
315				thread_exception_return();
316				/* NOTREACHED */
317			}
318		}
319		/*
320		 * XXX Turn 64 bit unsafe calls into nosys()
321		 */
322		if (callp->sy_flags & UNSAFE_64BIT) {
323			callp = &sysent[63];
324			goto unsafe;
325		}
326	} else
327	        KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
328				      0, 0, 0, 0, 0);
329unsafe:
330
331	/*
332	 * Delayed binding of thread credential to process credential, if we
333	 * are not running with an explicitly set thread credential.
334	 */
335	kauth_cred_uthread_update(uthread, p);
336
337	uthread->uu_rval[0] = 0;
338	uthread->uu_rval[1] = 0;
339
340
341	uthread->uu_flag |= UT_NOTCANCELPT;
342
343
344	AUDIT_SYSCALL_ENTER(code, p, uthread);
345	error = (*(callp->sy_call))((void *) p, uargp, &(uthread->uu_rval[0]));
346        AUDIT_SYSCALL_EXIT(code, p, uthread, error);
347
348#if CONFIG_DTRACE
349	uthread->t_dtrace_errno = error;
350#endif /* CONFIG_DTRACE */
351
352	if (error == ERESTART) {
353		/*
354		 * all system calls come through via the syscall instruction
355		 * in 64 bit mode... its 2 bytes in length
356		 * move the user's pc back to repeat the syscall:
357		 */
358	        regs->isf.rip -= 2;
359	}
360	else if (error != EJUSTRETURN) {
361		if (error) {
362			regs->rax = error;
363			regs->isf.rflags |= EFL_CF;	/* carry bit */
364		} else { /* (not error) */
365
366			switch (callp->sy_return_type) {
367			case _SYSCALL_RET_INT_T:
368				regs->rax = uthread->uu_rval[0];
369				regs->rdx = uthread->uu_rval[1];
370				break;
371			case _SYSCALL_RET_UINT_T:
372				regs->rax = ((u_int)uthread->uu_rval[0]);
373				regs->rdx = ((u_int)uthread->uu_rval[1]);
374				break;
375			case _SYSCALL_RET_OFF_T:
376			case _SYSCALL_RET_ADDR_T:
377			case _SYSCALL_RET_SIZE_T:
378			case _SYSCALL_RET_SSIZE_T:
379			        regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
380				regs->rdx = 0;
381				break;
382			case _SYSCALL_RET_NONE:
383				break;
384			default:
385				panic("unix_syscall: unknown return type");
386				break;
387			}
388			regs->isf.rflags &= ~EFL_CF;
389		}
390	}
391
392
393	uthread->uu_flag &= ~UT_NOTCANCELPT;
394
395	/*
396	 * if we're holding the funnel panic
397	 */
398	syscall_exit_funnelcheck();
399
400	if (uthread->uu_lowpri_window) {
401	        /*
402		 * task is marked as a low priority I/O type
403		 * and the I/O we issued while in this system call
404		 * collided with normal I/O operations... we'll
405		 * delay in order to mitigate the impact of this
406		 * task on the normal operation of the system
407		 */
408		throttle_lowpri_io(TRUE);
409	}
410	if (code != 180)
411	        KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
412				      error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0);
413
414	thread_exception_return();
415	/* NOTREACHED */
416}
417
418
419void
420unix_syscall_return(int error)
421{
422	thread_t		thread;
423	struct uthread		*uthread;
424	struct proc *p;
425	unsigned int code;
426	vm_offset_t params;
427	struct sysent *callp;
428
429	thread = current_thread();
430	uthread = get_bsdthread_info(thread);
431
432	p = current_proc();
433
434	if (proc_is64bit(p)) {
435		x86_saved_state64_t *regs;
436
437		regs = saved_state64(find_user_regs(thread));
438
439		/* reconstruct code for tracing before blasting rax */
440		code = regs->rax & SYSCALL_NUMBER_MASK;
441		callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
442
443		if (callp == sysent)
444			/*
445			 * indirect system call... system call number
446			 * passed as 'arg0'
447			 */
448			code = regs->rdi;
449
450#if CONFIG_DTRACE
451		if (callp->sy_call == dtrace_systrace_syscall)
452			dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
453#endif /* CONFIG_DTRACE */
454
455		if (error == ERESTART) {
456			/*
457			 * all system calls come through via the syscall instruction
458			 * in 64 bit mode... its 2 bytes in length
459			 * move the user's pc back to repeat the syscall:
460			 */
461			regs->isf.rip -= 2;
462		}
463		else if (error != EJUSTRETURN) {
464			if (error) {
465				regs->rax = error;
466				regs->isf.rflags |= EFL_CF;	/* carry bit */
467			} else { /* (not error) */
468
469				switch (callp->sy_return_type) {
470				case _SYSCALL_RET_INT_T:
471					regs->rax = uthread->uu_rval[0];
472					regs->rdx = uthread->uu_rval[1];
473					break;
474				case _SYSCALL_RET_UINT_T:
475					regs->rax = ((u_int)uthread->uu_rval[0]);
476					regs->rdx = ((u_int)uthread->uu_rval[1]);
477					break;
478				case _SYSCALL_RET_OFF_T:
479				case _SYSCALL_RET_ADDR_T:
480				case _SYSCALL_RET_SIZE_T:
481				case _SYSCALL_RET_SSIZE_T:
482					regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
483					regs->rdx = 0;
484					break;
485				case _SYSCALL_RET_NONE:
486					break;
487				default:
488					panic("unix_syscall: unknown return type");
489					break;
490				}
491				regs->isf.rflags &= ~EFL_CF;
492			}
493		}
494	} else {
495		x86_saved_state32_t	*regs;
496
497		regs = saved_state32(find_user_regs(thread));
498
499		regs->efl &= ~(EFL_CF);
500		/* reconstruct code for tracing before blasting eax */
501		code = regs->eax & I386_SYSCALL_NUMBER_MASK;
502		callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
503
504#if CONFIG_DTRACE
505		if (callp->sy_call == dtrace_systrace_syscall)
506			dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
507#endif /* CONFIG_DTRACE */
508
509		if (callp == sysent) {
510			params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int));
511			code = fuword(params);
512		}
513		if (error == ERESTART) {
514			regs->eip -= ((regs->cs & 0xffff) == SYSENTER_CS) ? 5 : 2;
515		}
516		else if (error != EJUSTRETURN) {
517			if (error) {
518				regs->eax = error;
519				regs->efl |= EFL_CF;	/* carry bit */
520			} else { /* (not error) */
521				regs->eax = uthread->uu_rval[0];
522				regs->edx = uthread->uu_rval[1];
523			}
524		}
525	}
526
527
528	uthread->uu_flag &= ~UT_NOTCANCELPT;
529
530	/*
531	 * if we're holding the funnel panic
532	 */
533	syscall_exit_funnelcheck();
534
535	if (uthread->uu_lowpri_window) {
536	        /*
537		 * task is marked as a low priority I/O type
538		 * and the I/O we issued while in this system call
539		 * collided with normal I/O operations... we'll
540		 * delay in order to mitigate the impact of this
541		 * task on the normal operation of the system
542		 */
543		throttle_lowpri_io(TRUE);
544	}
545	if (code != 180)
546	        KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
547				      error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0);
548
549	thread_exception_return();
550	/* NOTREACHED */
551}
552
553void
554munge_wwwlww(
555	__unused const void	*in32,
556	void			*out64)
557{
558	uint32_t	*arg32;
559	uint64_t	*arg64;
560
561	/* we convert in place in out64 */
562	arg32 = (uint32_t *) out64;
563	arg64 = (uint64_t *) out64;
564
565	arg64[5] = arg32[6];	/* wwwlwW */
566	arg64[4] = arg32[5];	/* wwwlWw */
567	arg32[7] = arg32[4];	/* wwwLww (hi) */
568	arg32[6] = arg32[3];	/* wwwLww (lo) */
569	arg64[2] = arg32[2];	/* wwWlww */
570	arg64[1] = arg32[1];	/* wWwlww */
571	arg64[0] = arg32[0];	/* Wwwlww */
572}
573
574
575void
576munge_wwlwww(
577	__unused const void	*in32,
578	void			*out64)
579{
580	uint32_t	*arg32;
581	uint64_t	*arg64;
582
583	/* we convert in place in out64 */
584	arg32 = (uint32_t *) out64;
585	arg64 = (uint64_t *) out64;
586
587	arg64[5] = arg32[6];	/* wwlwwW */
588	arg64[4] = arg32[5];	/* wwlwWw */
589	arg64[3] = arg32[4];	/* wwlWww  */
590	arg32[5] = arg32[3];	/* wwLwww (hi) */
591	arg32[4] = arg32[2];	/* wwLwww (lo) */
592	arg64[1] = arg32[1];	/* wWlwww */
593	arg64[0] = arg32[0];	/* Wwlwww */
594}
595
596#ifdef JOE_DEBUG
597joe_debug(char *p) {
598
599        printf("%s\n", p);
600}
601#endif
602
603
604