linux_sysvec.c revision 85599
1/*-
2 * Copyright (c) 1994-1996 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software withough specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: head/sys/i386/linux/linux_sysvec.c 85599 2001-10-27 11:15:19Z des $
29 */
30
31/* XXX we use functions that might not exist. */
32#include "opt_compat.h"
33
34#ifndef COMPAT_43
35#error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36#endif
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/imgact.h>
41#include <sys/imgact_aout.h>
42#include <sys/imgact_elf.h>
43#include <sys/lock.h>
44#include <sys/malloc.h>
45#include <sys/mutex.h>
46#include <sys/proc.h>
47#include <sys/signalvar.h>
48#include <sys/sysent.h>
49#include <sys/sysproto.h>
50
51#include <vm/vm.h>
52#include <vm/vm_param.h>
53#include <vm/vm_page.h>
54#include <vm/vm_extern.h>
55#include <sys/exec.h>
56#include <sys/kernel.h>
57#include <sys/module.h>
58#include <machine/cpu.h>
59#include <sys/mutex.h>
60
61#include <i386/linux/linux.h>
62#include <i386/linux/linux_proto.h>
63#include <compat/linux/linux_signal.h>
64#include <compat/linux/linux_util.h>
65
66MODULE_VERSION(linux, 1);
67MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
68MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
69MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
70
71MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
72
73#if BYTE_ORDER == LITTLE_ENDIAN
74#define SHELLMAGIC      0x2123 /* #! */
75#else
76#define SHELLMAGIC      0x2321
77#endif
78
79/*
80 * Allow the sendsig functions to use the ldebug() facility
81 * even though they are not syscalls themselves. Map them
82 * to syscall 0. This is slightly less bogus than using
83 * ldebug(sigreturn).
84 */
85#define	LINUX_SYS_linux_rt_sendsig	0
86#define	LINUX_SYS_linux_sendsig		0
87
88extern char linux_sigcode[];
89extern int linux_szsigcode;
90
91extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
92
93SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
94
95static int	linux_fixup __P((register_t **stack_base,
96				 struct image_params *iparams));
97static int	elf_linux_fixup __P((register_t **stack_base,
98				     struct image_params *iparams));
99static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
100				       u_int *code, caddr_t *params));
101static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
102				   u_long code));
103
104/*
105 * Linux syscalls return negative errno's, we do positive and map them
106 */
107static int bsd_to_linux_errno[ELAST + 1] = {
108  	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
109 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
110 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
111 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
112 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
113	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
114	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
115	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
116  	-6, -6, -43, -42, -75, -6, -84
117};
118
119int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
120	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
121	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
122	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
123	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
124	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
125	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
126	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
127	0, LINUX_SIGUSR1, LINUX_SIGUSR2
128};
129
130int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
131	SIGHUP, SIGINT, SIGQUIT, SIGILL,
132	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
133	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
134	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
135	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
136	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
137	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
138	SIGIO, SIGURG, 0
139};
140
141/*
142 * If FreeBSD & Linux have a difference of opinion about what a trap
143 * means, deal with it here.
144 *
145 * MPSAFE
146 */
147static int
148translate_traps(int signal, int trap_code)
149{
150	if (signal != SIGBUS)
151		return signal;
152	switch (trap_code) {
153	case T_PROTFLT:
154	case T_TSSFLT:
155	case T_DOUBLEFLT:
156	case T_PAGEFLT:
157		return SIGSEGV;
158	default:
159		return signal;
160	}
161}
162
163static int
164linux_fixup(register_t **stack_base, struct image_params *imgp)
165{
166	register_t *argv, *envp;
167
168	argv = *stack_base;
169	envp = *stack_base + (imgp->argc + 1);
170	(*stack_base)--;
171	**stack_base = (intptr_t)(void *)envp;
172	(*stack_base)--;
173	**stack_base = (intptr_t)(void *)argv;
174	(*stack_base)--;
175	**stack_base = imgp->argc;
176	return 0;
177}
178
179static int
180elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
181{
182	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
183	register_t *pos;
184
185	pos = *stack_base + (imgp->argc + imgp->envc + 2);
186
187	if (args->trace) {
188		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
189	}
190	if (args->execfd != -1) {
191		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
192	}
193	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
194	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
195	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
196	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
197	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
198	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
199	AUXARGS_ENTRY(pos, AT_BASE, args->base);
200	PROC_LOCK(imgp->proc);
201	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
202	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
203	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
204	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
205	PROC_UNLOCK(imgp->proc);
206	AUXARGS_ENTRY(pos, AT_NULL, 0);
207
208	free(imgp->auxargs, M_TEMP);
209	imgp->auxargs = NULL;
210
211	(*stack_base)--;
212	**stack_base = (long)imgp->argc;
213	return 0;
214}
215
216extern int _ucodesel, _udatasel;
217extern unsigned long linux_sznonrtsigcode;
218
219static void
220linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
221{
222	register struct thread *td = curthread;
223	register struct proc *p = td->td_proc;
224	register struct trapframe *regs;
225	struct l_rt_sigframe *fp, frame;
226	int oonstack;
227
228	PROC_LOCK_ASSERT(p, MA_OWNED);
229	regs = td->td_frame;
230	oonstack = sigonstack(regs->tf_esp);
231
232#ifdef DEBUG
233	if (ldebug(rt_sendsig))
234		printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
235		    catcher, sig, (void*)mask, code);
236#endif
237	/*
238	 * Allocate space for the signal handler context.
239	 */
240	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
241	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
242		fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp +
243		    p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe));
244	} else
245		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
246	PROC_UNLOCK(p);
247
248	/*
249	 * grow() will return FALSE if the fp will not fit inside the stack
250	 *	and the stack can not be grown. useracc will return FALSE
251	 *	if access is denied.
252	 */
253	if ((grow_stack (p, (int)fp) == FALSE) ||
254	    !useracc((caddr_t)fp, sizeof (struct l_rt_sigframe),
255	    VM_PROT_WRITE)) {
256		/*
257		 * Process has trashed its stack; give it an illegal
258		 * instruction to halt it in its tracks.
259		 */
260		PROC_LOCK(p);
261		SIGACTION(p, SIGILL) = SIG_DFL;
262		SIGDELSET(p->p_sigignore, SIGILL);
263		SIGDELSET(p->p_sigcatch, SIGILL);
264		SIGDELSET(p->p_sigmask, SIGILL);
265#ifdef DEBUG
266		if (ldebug(rt_sendsig))
267			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
268			    fp, oonstack);
269#endif
270		psignal(p, SIGILL);
271		return;
272	}
273
274	/*
275	 * Build the argument list for the signal handler.
276	 */
277	if (p->p_sysent->sv_sigtbl)
278		if (sig <= p->p_sysent->sv_sigsize)
279			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
280
281	frame.sf_handler = catcher;
282	frame.sf_sig = sig;
283	frame.sf_siginfo = &fp->sf_si;
284	frame.sf_ucontext = &fp->sf_sc;
285
286	/* Fill siginfo structure. */
287	frame.sf_si.lsi_signo = sig;
288	frame.sf_si.lsi_code = code;
289	frame.sf_si.lsi_addr = (void *)regs->tf_err;
290
291	/*
292	 * Build the signal context to be used by sigreturn.
293	 */
294	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
295	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
296
297	PROC_LOCK(p);
298	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
299	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
300	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
301	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
302	PROC_UNLOCK(p);
303
304	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
305
306	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
307	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
308	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
309	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
310	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
311	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
312	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
313	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
314	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
315	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
316	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
317	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
318	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
319	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
320	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
321	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
322	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
323	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
324	frame.sf_sc.uc_mcontext.sc_trapno = code;	/* XXX ???? */
325
326#ifdef DEBUG
327	if (ldebug(rt_sendsig))
328		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
329		    frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
330		    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
331#endif
332
333	if (copyout(&frame, fp, sizeof(frame)) != 0) {
334		/*
335		 * Process has trashed its stack; give it an illegal
336		 * instruction to halt it in its tracks.
337		 */
338		PROC_LOCK(p);
339		sigexit(td, SIGILL);
340		/* NOTREACHED */
341	}
342
343	/*
344	 * Build context to run handler in.
345	 */
346	regs->tf_esp = (int)fp;
347	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
348	    linux_sznonrtsigcode;
349	regs->tf_eflags &= ~PSL_VM;
350	regs->tf_cs = _ucodesel;
351	regs->tf_ds = _udatasel;
352	regs->tf_es = _udatasel;
353	regs->tf_fs = _udatasel;
354	regs->tf_ss = _udatasel;
355	PROC_LOCK(p);
356}
357
358
359/*
360 * Send an interrupt to process.
361 *
362 * Stack is set up to allow sigcode stored
363 * in u. to call routine, followed by kcall
364 * to sigreturn routine below.  After sigreturn
365 * resets the signal mask, the stack, and the
366 * frame pointer, it returns to the user
367 * specified pc, psl.
368 */
369
370static void
371linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
372{
373	register struct thread *td = curthread;
374	register struct proc *p = td->td_proc;
375	register struct trapframe *regs;
376	struct l_sigframe *fp, frame;
377	l_sigset_t lmask;
378	int oonstack, i;
379
380	PROC_LOCK_ASSERT(p, MA_OWNED);
381	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
382		/* Signal handler installed with SA_SIGINFO. */
383		linux_rt_sendsig(catcher, sig, mask, code);
384		return;
385	}
386
387	regs = td->td_frame;
388	oonstack = sigonstack(regs->tf_esp);
389
390#ifdef DEBUG
391	if (ldebug(sendsig))
392		printf(ARGS(sendsig, "%p, %d, %p, %lu"),
393		    catcher, sig, (void*)mask, code);
394#endif
395
396	/*
397	 * Allocate space for the signal handler context.
398	 */
399	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
400	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
401		fp = (struct l_sigframe *)(p->p_sigstk.ss_sp +
402		    p->p_sigstk.ss_size - sizeof(struct l_sigframe));
403	} else
404		fp = (struct l_sigframe *)regs->tf_esp - 1;
405	PROC_UNLOCK(p);
406
407	/*
408	 * grow() will return FALSE if the fp will not fit inside the stack
409	 *	and the stack can not be grown. useracc will return FALSE
410	 *	if access is denied.
411	 */
412	if ((grow_stack (p, (int)fp) == FALSE) ||
413	    !useracc((caddr_t)fp, sizeof (struct l_sigframe),
414	    VM_PROT_WRITE)) {
415		/*
416		 * Process has trashed its stack; give it an illegal
417		 * instruction to halt it in its tracks.
418		 */
419		PROC_LOCK(p);
420		SIGACTION(p, SIGILL) = SIG_DFL;
421		SIGDELSET(p->p_sigignore, SIGILL);
422		SIGDELSET(p->p_sigcatch, SIGILL);
423		SIGDELSET(p->p_sigmask, SIGILL);
424		psignal(p, SIGILL);
425		return;
426	}
427
428	/*
429	 * Build the argument list for the signal handler.
430	 */
431	if (p->p_sysent->sv_sigtbl)
432		if (sig <= p->p_sysent->sv_sigsize)
433			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
434
435	frame.sf_handler = catcher;
436	frame.sf_sig = sig;
437
438	bsd_to_linux_sigset(mask, &lmask);
439
440	/*
441	 * Build the signal context to be used by sigreturn.
442	 */
443	frame.sf_sc.sc_mask   = lmask.__bits[0];
444	frame.sf_sc.sc_gs     = rgs();
445	frame.sf_sc.sc_fs     = regs->tf_fs;
446	frame.sf_sc.sc_es     = regs->tf_es;
447	frame.sf_sc.sc_ds     = regs->tf_ds;
448	frame.sf_sc.sc_edi    = regs->tf_edi;
449	frame.sf_sc.sc_esi    = regs->tf_esi;
450	frame.sf_sc.sc_ebp    = regs->tf_ebp;
451	frame.sf_sc.sc_ebx    = regs->tf_ebx;
452	frame.sf_sc.sc_edx    = regs->tf_edx;
453	frame.sf_sc.sc_ecx    = regs->tf_ecx;
454	frame.sf_sc.sc_eax    = regs->tf_eax;
455	frame.sf_sc.sc_eip    = regs->tf_eip;
456	frame.sf_sc.sc_cs     = regs->tf_cs;
457	frame.sf_sc.sc_eflags = regs->tf_eflags;
458	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
459	frame.sf_sc.sc_ss     = regs->tf_ss;
460	frame.sf_sc.sc_err    = regs->tf_err;
461	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
462
463	bzero(&frame.sf_fpstate, sizeof(struct l_fpstate));
464
465	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
466		frame.sf_extramask[i] = lmask.__bits[i+1];
467
468	if (copyout(&frame, fp, sizeof(frame)) != 0) {
469		/*
470		 * Process has trashed its stack; give it an illegal
471		 * instruction to halt it in its tracks.
472		 */
473		PROC_LOCK(p);
474		sigexit(td, SIGILL);
475		/* NOTREACHED */
476	}
477
478	/*
479	 * Build context to run handler in.
480	 */
481	regs->tf_esp = (int)fp;
482	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
483	regs->tf_eflags &= ~PSL_VM;
484	regs->tf_cs = _ucodesel;
485	regs->tf_ds = _udatasel;
486	regs->tf_es = _udatasel;
487	regs->tf_fs = _udatasel;
488	regs->tf_ss = _udatasel;
489	PROC_LOCK(p);
490}
491
492/*
493 * System call to cleanup state after a signal
494 * has been taken.  Reset signal mask and
495 * stack state from context left by sendsig (above).
496 * Return to previous pc and psl as specified by
497 * context left by sendsig. Check carefully to
498 * make sure that the user has not modified the
499 * psl to gain improper privileges or to cause
500 * a machine fault.
501 */
502int
503linux_sigreturn(td, args)
504	struct thread *td;
505	struct linux_sigreturn_args *args;
506{
507	struct proc *p = td->td_proc;
508	struct l_sigframe frame;
509	register struct trapframe *regs;
510	l_sigset_t lmask;
511	int eflags, i;
512
513	regs = td->td_frame;
514
515#ifdef DEBUG
516	if (ldebug(sigreturn))
517		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
518#endif
519	/*
520	 * The trampoline code hands us the sigframe.
521	 * It is unsafe to keep track of it ourselves, in the event that a
522	 * program jumps out of a signal handler.
523	 */
524	if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
525		return (EFAULT);
526
527	/*
528	 * Check for security violations.
529	 */
530#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
531	eflags = frame.sf_sc.sc_eflags;
532	/*
533	 * XXX do allow users to change the privileged flag PSL_RF.  The
534	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
535	 * sometimes set it there too.  tf_eflags is kept in the signal
536	 * context during signal handling and there is no other place
537	 * to remember it, so the PSL_RF bit may be corrupted by the
538	 * signal handler without us knowing.  Corruption of the PSL_RF
539	 * bit at worst causes one more or one less debugger trap, so
540	 * allowing it is fairly harmless.
541	 */
542	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
543    		return(EINVAL);
544	}
545
546	/*
547	 * Don't allow users to load a valid privileged %cs.  Let the
548	 * hardware check for invalid selectors, excess privilege in
549	 * other selectors, invalid %eip's and invalid %esp's.
550	 */
551#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
552	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
553		trapsignal(p, SIGBUS, T_PROTFLT);
554		return(EINVAL);
555	}
556
557	lmask.__bits[0] = frame.sf_sc.sc_mask;
558	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
559		lmask.__bits[i+1] = frame.sf_extramask[i];
560	PROC_LOCK(p);
561	linux_to_bsd_sigset(&lmask, &p->p_sigmask);
562	SIG_CANTMASK(p->p_sigmask);
563	PROC_UNLOCK(p);
564
565	/*
566	 * Restore signal context.
567	 */
568	/* %gs was restored by the trampoline. */
569	regs->tf_fs     = frame.sf_sc.sc_fs;
570	regs->tf_es     = frame.sf_sc.sc_es;
571	regs->tf_ds     = frame.sf_sc.sc_ds;
572	regs->tf_edi    = frame.sf_sc.sc_edi;
573	regs->tf_esi    = frame.sf_sc.sc_esi;
574	regs->tf_ebp    = frame.sf_sc.sc_ebp;
575	regs->tf_ebx    = frame.sf_sc.sc_ebx;
576	regs->tf_edx    = frame.sf_sc.sc_edx;
577	regs->tf_ecx    = frame.sf_sc.sc_ecx;
578	regs->tf_eax    = frame.sf_sc.sc_eax;
579	regs->tf_eip    = frame.sf_sc.sc_eip;
580	regs->tf_cs     = frame.sf_sc.sc_cs;
581	regs->tf_eflags = eflags;
582	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
583	regs->tf_ss     = frame.sf_sc.sc_ss;
584
585	return (EJUSTRETURN);
586}
587
588/*
589 * System call to cleanup state after a signal
590 * has been taken.  Reset signal mask and
591 * stack state from context left by rt_sendsig (above).
592 * Return to previous pc and psl as specified by
593 * context left by sendsig. Check carefully to
594 * make sure that the user has not modified the
595 * psl to gain improper privileges or to cause
596 * a machine fault.
597 */
598int
599linux_rt_sigreturn(td, args)
600	struct thread *td;
601	struct linux_rt_sigreturn_args *args;
602{
603	struct proc *p = td->td_proc;
604	struct sigaltstack_args sasargs;
605	struct l_ucontext uc;
606	struct l_sigcontext *context;
607	l_stack_t *lss;
608	stack_t *ss;
609	register struct trapframe *regs;
610	int eflags;
611	caddr_t sg = stackgap_init();
612
613	regs = td->td_frame;
614
615#ifdef DEBUG
616	if (ldebug(rt_sigreturn))
617		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
618#endif
619	/*
620	 * The trampoline code hands us the ucontext.
621	 * It is unsafe to keep track of it ourselves, in the event that a
622	 * program jumps out of a signal handler.
623	 */
624	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
625		return (EFAULT);
626
627	context = &uc.uc_mcontext;
628
629	/*
630	 * Check for security violations.
631	 */
632#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
633	eflags = context->sc_eflags;
634	/*
635	 * XXX do allow users to change the privileged flag PSL_RF.  The
636	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
637	 * sometimes set it there too.  tf_eflags is kept in the signal
638	 * context during signal handling and there is no other place
639	 * to remember it, so the PSL_RF bit may be corrupted by the
640	 * signal handler without us knowing.  Corruption of the PSL_RF
641	 * bit at worst causes one more or one less debugger trap, so
642	 * allowing it is fairly harmless.
643	 */
644	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
645    		return(EINVAL);
646	}
647
648	/*
649	 * Don't allow users to load a valid privileged %cs.  Let the
650	 * hardware check for invalid selectors, excess privilege in
651	 * other selectors, invalid %eip's and invalid %esp's.
652	 */
653#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
654	if (!CS_SECURE(context->sc_cs)) {
655		trapsignal(p, SIGBUS, T_PROTFLT);
656		return(EINVAL);
657	}
658
659	PROC_LOCK(p);
660	linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
661	SIG_CANTMASK(p->p_sigmask);
662	PROC_UNLOCK(p);
663
664	/*
665	 * Restore signal context
666	 */
667	/* %gs was restored by the trampoline. */
668	regs->tf_fs     = context->sc_fs;
669	regs->tf_es     = context->sc_es;
670	regs->tf_ds     = context->sc_ds;
671	regs->tf_edi    = context->sc_edi;
672	regs->tf_esi    = context->sc_esi;
673	regs->tf_ebp    = context->sc_ebp;
674	regs->tf_ebx    = context->sc_ebx;
675	regs->tf_edx    = context->sc_edx;
676	regs->tf_ecx    = context->sc_ecx;
677	regs->tf_eax    = context->sc_eax;
678	regs->tf_eip    = context->sc_eip;
679	regs->tf_cs     = context->sc_cs;
680	regs->tf_eflags = eflags;
681	regs->tf_esp    = context->sc_esp_at_signal;
682	regs->tf_ss     = context->sc_ss;
683
684	/*
685	 * call sigaltstack & ignore results..
686	 */
687	ss = stackgap_alloc(&sg, sizeof(stack_t));
688	lss = &uc.uc_stack;
689	ss->ss_sp = lss->ss_sp;
690	ss->ss_size = lss->ss_size;
691	ss->ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
692
693#ifdef DEBUG
694	if (ldebug(rt_sigreturn))
695		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
696		    ss->ss_flags, ss->ss_sp, ss->ss_size, context->sc_mask);
697#endif
698	sasargs.ss = ss;
699	sasargs.oss = NULL;
700	(void) sigaltstack(td, &sasargs);
701
702	return (EJUSTRETURN);
703}
704
705/*
706 * MPSAFE
707 */
708static void
709linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
710{
711	args[0] = tf->tf_ebx;
712	args[1] = tf->tf_ecx;
713	args[2] = tf->tf_edx;
714	args[3] = tf->tf_esi;
715	args[4] = tf->tf_edi;
716	*params = NULL;		/* no copyin */
717}
718
719/*
720 * If a linux binary is exec'ing something, try this image activator
721 * first.  We override standard shell script execution in order to
722 * be able to modify the interpreter path.  We only do this if a linux
723 * binary is doing the exec, so we do not create an EXEC module for it.
724 */
725static int	exec_linux_imgact_try __P((struct image_params *iparams));
726
727static int
728exec_linux_imgact_try(imgp)
729    struct image_params *imgp;
730{
731    const char *head = (const char *)imgp->image_header;
732    int error = -1;
733
734    /*
735     * The interpreter for shell scripts run from a linux binary needs
736     * to be located in /compat/linux if possible in order to recursively
737     * maintain linux path emulation.
738     */
739    if (((const short *)head)[0] == SHELLMAGIC) {
740	    /*
741	     * Run our normal shell image activator.  If it succeeds attempt
742	     * to use the alternate path for the interpreter.  If an alternate
743	     * path is found, use our stringspace to store it.
744	     */
745	    if ((error = exec_shell_imgact(imgp)) == 0) {
746		    char *rpath = NULL;
747
748		    linux_emul_find(&imgp->proc->p_thread, NULL,
749			imgp->interpreter_name, &rpath, 0);
750		    if (rpath != imgp->interpreter_name) {
751			    int len = strlen(rpath) + 1;
752
753			    if (len <= MAXSHELLCMDLEN) {
754				    memcpy(imgp->interpreter_name, rpath, len);
755			    }
756			    free(rpath, M_TEMP);
757		    }
758	    }
759    }
760    return(error);
761}
762
763struct sysentvec linux_sysvec = {
764	LINUX_SYS_MAXSYSCALL,
765	linux_sysent,
766	0xff,
767	LINUX_SIGTBLSZ,
768	bsd_to_linux_signal,
769	ELAST + 1,
770	bsd_to_linux_errno,
771	translate_traps,
772	linux_fixup,
773	linux_sendsig,
774	linux_sigcode,
775	&linux_szsigcode,
776	linux_prepsyscall,
777	"Linux a.out",
778	aout_coredump,
779	exec_linux_imgact_try,
780	LINUX_MINSIGSTKSZ
781};
782
783struct sysentvec elf_linux_sysvec = {
784	LINUX_SYS_MAXSYSCALL,
785	linux_sysent,
786	0xff,
787	LINUX_SIGTBLSZ,
788	bsd_to_linux_signal,
789	ELAST + 1,
790	bsd_to_linux_errno,
791	translate_traps,
792	elf_linux_fixup,
793	linux_sendsig,
794	linux_sigcode,
795	&linux_szsigcode,
796	linux_prepsyscall,
797	"Linux ELF",
798	elf_coredump,
799	exec_linux_imgact_try,
800	LINUX_MINSIGSTKSZ
801};
802
803static Elf32_Brandinfo linux_brand = {
804					ELFOSABI_LINUX,
805					"Linux",
806					"/compat/linux",
807					"/lib/ld-linux.so.1",
808					&elf_linux_sysvec
809				 };
810
811static Elf32_Brandinfo linux_glibc2brand = {
812					ELFOSABI_LINUX,
813					"Linux",
814					"/compat/linux",
815					"/lib/ld-linux.so.2",
816					&elf_linux_sysvec
817				 };
818
819Elf32_Brandinfo *linux_brandlist[] = {
820					&linux_brand,
821					&linux_glibc2brand,
822					NULL
823				};
824
825static int
826linux_elf_modevent(module_t mod, int type, void *data)
827{
828	Elf32_Brandinfo **brandinfo;
829	int error;
830	struct linux_ioctl_handler **lihp;
831
832	error = 0;
833
834	switch(type) {
835	case MOD_LOAD:
836		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
837		     ++brandinfo)
838			if (elf_insert_brand_entry(*brandinfo) < 0)
839				error = EINVAL;
840		if (error == 0) {
841			SET_FOREACH(lihp, linux_ioctl_handler_set)
842				linux_ioctl_register_handler(*lihp);
843			if (bootverbose)
844				printf("Linux ELF exec handler installed\n");
845		} else
846			printf("cannot insert Linux ELF brand handler\n");
847		break;
848	case MOD_UNLOAD:
849		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
850		     ++brandinfo)
851			if (elf_brand_inuse(*brandinfo))
852				error = EBUSY;
853		if (error == 0) {
854			for (brandinfo = &linux_brandlist[0];
855			     *brandinfo != NULL; ++brandinfo)
856				if (elf_remove_brand_entry(*brandinfo) < 0)
857					error = EINVAL;
858		}
859		if (error == 0) {
860			SET_FOREACH(lihp, linux_ioctl_handler_set)
861				linux_ioctl_unregister_handler(*lihp);
862			if (bootverbose)
863				printf("Linux ELF exec handler removed\n");
864		} else
865			printf("Could not deinstall ELF interpreter entry\n");
866		break;
867	default:
868		break;
869	}
870	return error;
871}
872
873static moduledata_t linux_elf_mod = {
874	"linuxelf",
875	linux_elf_modevent,
876	0
877};
878
879DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
880