linux_sysvec.c revision 68689
1/*-
2 * Copyright (c) 1994-1996 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software withough specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: head/sys/i386/linux/linux_sysvec.c 68689 2000-11-13 20:44:05Z gallatin $
29 */
30
31/* XXX we use functions that might not exist. */
32#include "opt_compat.h"
33
34#ifndef COMPAT_43
35#error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36#endif
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/proc.h>
41#include <sys/sysproto.h>
42#include <sys/sysent.h>
43#include <sys/imgact.h>
44#include <sys/imgact_aout.h>
45#include <sys/imgact_elf.h>
46#include <sys/signalvar.h>
47#include <sys/malloc.h>
48#include <vm/vm.h>
49#include <vm/vm_param.h>
50#include <vm/vm_page.h>
51#include <vm/vm_extern.h>
52#include <sys/exec.h>
53#include <sys/kernel.h>
54#include <sys/module.h>
55#include <machine/cpu.h>
56
57#include <i386/linux/linux.h>
58#include <i386/linux/linux_proto.h>
59#include <compat/linux/linux_util.h>
60
61MODULE_VERSION(linux, 1);
62
63MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
64
65#if BYTE_ORDER == LITTLE_ENDIAN
66#define SHELLMAGIC      0x2123 /* #! */
67#else
68#define SHELLMAGIC      0x2321
69#endif
70
71extern char linux_sigcode[];
72extern int linux_szsigcode;
73
74extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
75
76extern struct linker_set linux_ioctl_handler_set;
77
78static int	linux_fixup __P((register_t **stack_base,
79				 struct image_params *iparams));
80static int	elf_linux_fixup __P((register_t **stack_base,
81				     struct image_params *iparams));
82static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
83				       u_int *code, caddr_t *params));
84static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
85				   u_long code));
86
87/*
88 * Linux syscalls return negative errno's, we do positive and map them
89 */
90static int bsd_to_linux_errno[ELAST + 1] = {
91  	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
92 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
93 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
94 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
95 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
96	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
97	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
98	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
99  	-6, -6, -43, -42, -75, -6, -84
100};
101
102int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
103	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
104	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
105	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
106	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
107	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
108	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
109	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
110	0, LINUX_SIGUSR1, LINUX_SIGUSR2
111};
112
113int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
114	SIGHUP, SIGINT, SIGQUIT, SIGILL,
115	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
116	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
117	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
118	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
119	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
120	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
121	SIGIO, SIGURG, 0
122};
123
124/*
125 * If FreeBSD & Linux have a difference of opinion about what a trap
126 * means, deal with it here.
127 */
128static int
129translate_traps(int signal, int trap_code)
130{
131	if (signal != SIGBUS)
132		return signal;
133	switch (trap_code) {
134	case T_PROTFLT:
135	case T_TSSFLT:
136	case T_DOUBLEFLT:
137	case T_PAGEFLT:
138		return SIGSEGV;
139	default:
140		return signal;
141	}
142}
143
144static int
145linux_fixup(register_t **stack_base, struct image_params *imgp)
146{
147	register_t *argv, *envp;
148
149	argv = *stack_base;
150	envp = *stack_base + (imgp->argc + 1);
151	(*stack_base)--;
152	**stack_base = (intptr_t)(void *)envp;
153	(*stack_base)--;
154	**stack_base = (intptr_t)(void *)argv;
155	(*stack_base)--;
156	**stack_base = imgp->argc;
157	return 0;
158}
159
160static int
161elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
162{
163	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
164	register_t *pos;
165
166	pos = *stack_base + (imgp->argc + imgp->envc + 2);
167
168	if (args->trace) {
169		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
170	}
171	if (args->execfd != -1) {
172		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
173	}
174	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
175	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
176	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
177	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
178	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
179	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
180	AUXARGS_ENTRY(pos, AT_BASE, args->base);
181	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid);
182	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid);
183	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid);
184	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid);
185	AUXARGS_ENTRY(pos, AT_NULL, 0);
186
187	free(imgp->auxargs, M_TEMP);
188	imgp->auxargs = NULL;
189
190	(*stack_base)--;
191	**stack_base = (long)imgp->argc;
192	return 0;
193}
194
195extern int _ucodesel, _udatasel;
196extern unsigned long _linux_sznonrtsigcode;
197
198static void
199linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
200{
201	register struct proc *p = curproc;
202	register struct trapframe *regs;
203	struct linux_rt_sigframe *fp, frame;
204	struct sigacts *psp = p->p_sigacts;
205	int oonstack;
206
207	regs = p->p_md.md_regs;
208	oonstack = p->p_sigstk.ss_flags & SS_ONSTACK;
209
210#ifdef DEBUG
211	printf("Linux-emul(%ld): linux_rt_sendsig(%p, %d, %p, %lu)\n",
212	    (long)p->p_pid, catcher, sig, (void*)mask, code);
213#endif
214	/*
215	 * Allocate space for the signal handler context.
216	 */
217	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
218	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
219		fp = (struct linux_rt_sigframe *)(p->p_sigstk.ss_sp +
220		    p->p_sigstk.ss_size - sizeof(struct linux_rt_sigframe));
221		p->p_sigstk.ss_flags |= SS_ONSTACK;
222	} else {
223		fp = (struct linux_rt_sigframe *)regs->tf_esp - 1;
224	}
225
226	/*
227	 * grow() will return FALSE if the fp will not fit inside the stack
228	 *	and the stack can not be grown. useracc will return FALSE
229	 *	if access is denied.
230	 */
231	if ((grow_stack (p, (int)fp) == FALSE) ||
232	    !useracc((caddr_t)fp, sizeof (struct linux_rt_sigframe),
233	    VM_PROT_WRITE)) {
234		/*
235		 * Process has trashed its stack; give it an illegal
236		 * instruction to halt it in its tracks.
237		 */
238		SIGACTION(p, SIGILL) = SIG_DFL;
239		SIGDELSET(p->p_sigignore, SIGILL);
240		SIGDELSET(p->p_sigcatch, SIGILL);
241		SIGDELSET(p->p_sigmask, SIGILL);
242#ifdef DEBUG
243		printf("Linux-emul(%ld): linux_rt_sendsig -- bad stack %p, SS_ONSTACK: 0x%x ",
244	    (long)p->p_pid, fp, p->p_sigstk.ss_flags & SS_ONSTACK);
245#endif
246		psignal(p, SIGILL);
247		return;
248	}
249
250	/*
251	 * Build the argument list for the signal handler.
252	 */
253	if (p->p_sysent->sv_sigtbl)
254		if (sig <= p->p_sysent->sv_sigsize)
255			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
256
257	frame.sf_handler = catcher;
258	frame.sf_sig = sig;
259
260	frame.sf_siginfo = &fp->sf_si;
261	frame.sf_ucontext = &fp->sf_sc;
262	/* Fill siginfo structure. */
263	frame.sf_si.lsi_signo = sig;
264	frame.sf_si.lsi_code = code;
265	frame.sf_si.lsi_addr = (void *)regs->tf_err;
266	/*
267	 * Build the signal context to be used by sigreturn.
268	 */
269	frame.sf_sc.uc_mcontext.sc_mask   = mask->__bits[0];
270	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
271	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
272	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
273	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
274	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
275	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
276	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
277	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
278	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
279	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
280	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
281	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
282	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
283	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
284	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
285	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
286	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
287	frame.sf_sc.uc_mcontext.sc_trapno = code;	/* XXX ???? */
288
289	/*
290	 * Build the remainder of the ucontext struct to be used by sigreturn.
291	 */
292	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
293	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
294	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
295	frame.sf_sc.uc_stack.ss_flags =
296	    bsd_to_linux_sigaltstack(p->p_sigstk.ss_flags);
297	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
298#ifdef DEBUG
299	printf("Linux-emul(%ld): rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x\n",
300	    (long)p->p_pid, frame.sf_sc.uc_stack.ss_flags,  p->p_sigstk.ss_sp,
301	    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
302#endif
303	bsd_to_linux_sigset(&p->p_sigmask, &frame.sf_sc.uc_sigmask);
304
305	if (copyout(&frame, fp, sizeof(frame)) != 0) {
306		/*
307		 * Process has trashed its stack; give it an illegal
308		 * instruction to halt it in its tracks.
309		 */
310		sigexit(p, SIGILL);
311		/* NOTREACHED */
312	}
313
314	/*
315	 * Build context to run handler in.
316	 */
317	regs->tf_esp = (int)fp;
318	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
319	    _linux_sznonrtsigcode;
320	regs->tf_eflags &= ~PSL_VM;
321	regs->tf_cs = _ucodesel;
322	regs->tf_ds = _udatasel;
323	regs->tf_es = _udatasel;
324	regs->tf_fs = _udatasel;
325	load_gs(_udatasel);
326	regs->tf_ss = _udatasel;
327}
328
329
330/*
331 * Send an interrupt to process.
332 *
333 * Stack is set up to allow sigcode stored
334 * in u. to call routine, followed by kcall
335 * to sigreturn routine below.  After sigreturn
336 * resets the signal mask, the stack, and the
337 * frame pointer, it returns to the user
338 * specified pc, psl.
339 */
340
341static void
342linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
343{
344	register struct proc *p = curproc;
345	register struct trapframe *regs;
346	struct linux_sigframe *fp, frame;
347	struct sigacts *psp = p->p_sigacts;
348	int oonstack, i;
349
350	regs = p->p_md.md_regs;
351	oonstack = p->p_sigstk.ss_flags & SS_ONSTACK;
352
353#ifdef DEBUG
354	printf("Linux-emul(%ld): linux_sendsig(%p, %d, %p, %lu)\n",
355	    (long)p->p_pid, catcher, sig, (void*)mask, code);
356#endif
357
358	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
359		/* Signal handler installed with SA_SIGINFO. */
360		linux_rt_sendsig(catcher, sig, mask, code);
361		return;
362	}
363
364	/*
365	 * Allocate space for the signal handler context.
366	 */
367	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
368	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
369		fp = (struct linux_sigframe *)(p->p_sigstk.ss_sp +
370		    p->p_sigstk.ss_size - sizeof(struct linux_sigframe));
371		p->p_sigstk.ss_flags |= SS_ONSTACK;
372	} else {
373		fp = (struct linux_sigframe *)regs->tf_esp - 1;
374	}
375
376	/*
377	 * grow() will return FALSE if the fp will not fit inside the stack
378	 *	and the stack can not be grown. useracc will return FALSE
379	 *	if access is denied.
380	 */
381	if ((grow_stack (p, (int)fp) == FALSE) ||
382	    !useracc((caddr_t)fp, sizeof (struct linux_sigframe),
383	    VM_PROT_WRITE)) {
384		/*
385		 * Process has trashed its stack; give it an illegal
386		 * instruction to halt it in its tracks.
387		 */
388		SIGACTION(p, SIGILL) = SIG_DFL;
389		SIGDELSET(p->p_sigignore, SIGILL);
390		SIGDELSET(p->p_sigcatch, SIGILL);
391		SIGDELSET(p->p_sigmask, SIGILL);
392		psignal(p, SIGILL);
393		return;
394	}
395
396	/*
397	 * Build the argument list for the signal handler.
398	 */
399	if (p->p_sysent->sv_sigtbl)
400		if (sig <= p->p_sysent->sv_sigsize)
401			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
402
403	frame.sf_handler = catcher;
404	frame.sf_sig = sig;
405
406	/*
407	 * Build the signal context to be used by sigreturn.
408	 */
409	frame.sf_sc.sc_mask   = mask->__bits[0];
410	frame.sf_sc.sc_gs     = rgs();
411	frame.sf_sc.sc_fs     = regs->tf_fs;
412	frame.sf_sc.sc_es     = regs->tf_es;
413	frame.sf_sc.sc_ds     = regs->tf_ds;
414	frame.sf_sc.sc_edi    = regs->tf_edi;
415	frame.sf_sc.sc_esi    = regs->tf_esi;
416	frame.sf_sc.sc_ebp    = regs->tf_ebp;
417	frame.sf_sc.sc_ebx    = regs->tf_ebx;
418	frame.sf_sc.sc_edx    = regs->tf_edx;
419	frame.sf_sc.sc_ecx    = regs->tf_ecx;
420	frame.sf_sc.sc_eax    = regs->tf_eax;
421	frame.sf_sc.sc_eip    = regs->tf_eip;
422	frame.sf_sc.sc_cs     = regs->tf_cs;
423	frame.sf_sc.sc_eflags = regs->tf_eflags;
424	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
425	frame.sf_sc.sc_ss     = regs->tf_ss;
426	frame.sf_sc.sc_err    = regs->tf_err;
427	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
428	bzero(&frame.fpstate, sizeof(struct linux_fpstate));
429	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
430		frame.extramask[i] = mask->__bits[i+1];
431
432	if (copyout(&frame, fp, sizeof(frame)) != 0) {
433		/*
434		 * Process has trashed its stack; give it an illegal
435		 * instruction to halt it in its tracks.
436		 */
437		sigexit(p, SIGILL);
438		/* NOTREACHED */
439	}
440
441	/*
442	 * Build context to run handler in.
443	 */
444	regs->tf_esp = (int)fp;
445	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
446	regs->tf_eflags &= ~PSL_VM;
447	regs->tf_cs = _ucodesel;
448	regs->tf_ds = _udatasel;
449	regs->tf_es = _udatasel;
450	regs->tf_fs = _udatasel;
451	load_gs(_udatasel);
452	regs->tf_ss = _udatasel;
453}
454
455/*
456 * System call to cleanup state after a signal
457 * has been taken.  Reset signal mask and
458 * stack state from context left by sendsig (above).
459 * Return to previous pc and psl as specified by
460 * context left by sendsig. Check carefully to
461 * make sure that the user has not modified the
462 * psl to gain improper privileges or to cause
463 * a machine fault.
464 */
465int
466linux_sigreturn(p, args)
467	struct proc *p;
468	struct linux_sigreturn_args *args;
469{
470	struct linux_sigcontext context;
471	register struct trapframe *regs;
472	u_int extramask[LINUX_NSIG_WORDS-1];
473	u_int *emp;
474	int eflags, i;
475
476	regs = p->p_md.md_regs;
477
478#ifdef DEBUG
479	printf("Linux-emul(%ld): linux_sigreturn(%p)\n",
480	    (long)p->p_pid, (void *)args->scp);
481#endif
482	/*
483	 * The trampoline code hands us the context.
484	 * It is unsafe to keep track of it ourselves, in the event that a
485	 * program jumps out of a signal handler.
486	 */
487	if (copyin((caddr_t)args->scp, &context, sizeof(context)) != 0)
488		return (EFAULT);
489
490	/*
491	 * Check for security violations.
492	 */
493#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
494	eflags = context.sc_eflags;
495	/*
496	 * XXX do allow users to change the privileged flag PSL_RF.  The
497	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
498	 * sometimes set it there too.  tf_eflags is kept in the signal
499	 * context during signal handling and there is no other place
500	 * to remember it, so the PSL_RF bit may be corrupted by the
501	 * signal handler without us knowing.  Corruption of the PSL_RF
502	 * bit at worst causes one more or one less debugger trap, so
503	 * allowing it is fairly harmless.
504	 */
505	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
506    		return(EINVAL);
507	}
508
509	/*
510	 * Don't allow users to load a valid privileged %cs.  Let the
511	 * hardware check for invalid selectors, excess privilege in
512	 * other selectors, invalid %eip's and invalid %esp's.
513	 */
514#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
515	if (!CS_SECURE(context.sc_cs)) {
516		trapsignal(p, SIGBUS, T_PROTFLT);
517		return(EINVAL);
518	}
519
520	p->p_sigstk.ss_flags &= ~SS_ONSTACK;
521	emp = (u_int *)((caddr_t)args->scp + sizeof(context) +
522	    sizeof(struct linux_fpstate));
523	if (copyin((caddr_t)emp, extramask, sizeof(extramask)) == 0)
524		for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
525			p->p_sigmask.__bits[i+1] = extramask[i];
526
527	SIGSETOLD(p->p_sigmask, context.sc_mask);
528	SIG_CANTMASK(p->p_sigmask);
529
530	/*
531	 * Restore signal context.
532	 */
533	/* %gs was restored by the trampoline. */
534	regs->tf_fs     = context.sc_fs;
535	regs->tf_es     = context.sc_es;
536	regs->tf_ds     = context.sc_ds;
537	regs->tf_edi    = context.sc_edi;
538	regs->tf_esi    = context.sc_esi;
539	regs->tf_ebp    = context.sc_ebp;
540	regs->tf_ebx    = context.sc_ebx;
541	regs->tf_edx    = context.sc_edx;
542	regs->tf_ecx    = context.sc_ecx;
543	regs->tf_eax    = context.sc_eax;
544	regs->tf_eip    = context.sc_eip;
545	regs->tf_cs     = context.sc_cs;
546	regs->tf_eflags = eflags;
547	regs->tf_esp    = context.sc_esp_at_signal;
548	regs->tf_ss     = context.sc_ss;
549
550	return (EJUSTRETURN);
551}
552
553/*
554 * System call to cleanup state after a signal
555 * has been taken.  Reset signal mask and
556 * stack state from context left by rt_sendsig (above).
557 * Return to previous pc and psl as specified by
558 * context left by sendsig. Check carefully to
559 * make sure that the user has not modified the
560 * psl to gain improper privileges or to cause
561 * a machine fault.
562 */
563int
564linux_rt_sigreturn(p, args)
565	struct proc *p;
566	struct linux_rt_sigreturn_args *args;
567{
568	struct sigaltstack_args sasargs;
569	struct linux_ucontext 	 uc;
570	struct linux_sigcontext *context;
571	linux_stack_t *lss;
572	stack_t *ss;
573	register struct trapframe *regs;
574	int eflags;
575	caddr_t sg = stackgap_init();
576
577	regs = p->p_md.md_regs;
578
579#ifdef DEBUG
580	printf("Linux-emul(%ld): linux_rt_sigreturn(%p)\n",
581	    (long)p->p_pid, (void *)args->ucp);
582#endif
583	/*
584	 * The trampoline code hands us the u_context.
585	 * It is unsafe to keep track of it ourselves, in the event that a
586	 * program jumps out of a signal handler.
587	 */
588	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
589		return (EFAULT);
590
591	context = &uc.uc_mcontext;
592
593	/*
594	 * Check for security violations.
595	 */
596#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
597	eflags = context->sc_eflags;
598	/*
599	 * XXX do allow users to change the privileged flag PSL_RF.  The
600	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
601	 * sometimes set it there too.  tf_eflags is kept in the signal
602	 * context during signal handling and there is no other place
603	 * to remember it, so the PSL_RF bit may be corrupted by the
604	 * signal handler without us knowing.  Corruption of the PSL_RF
605	 * bit at worst causes one more or one less debugger trap, so
606	 * allowing it is fairly harmless.
607	 */
608	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
609    		return(EINVAL);
610	}
611
612	/*
613	 * Don't allow users to load a valid privileged %cs.  Let the
614	 * hardware check for invalid selectors, excess privilege in
615	 * other selectors, invalid %eip's and invalid %esp's.
616	 */
617#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
618	if (!CS_SECURE(context->sc_cs)) {
619		trapsignal(p, SIGBUS, T_PROTFLT);
620		return(EINVAL);
621	}
622
623	p->p_sigstk.ss_flags &= ~SS_ONSTACK;
624	SIGSETOLD(p->p_sigmask, context->sc_mask);
625	SIG_CANTMASK(p->p_sigmask);
626
627	/*
628	 * Restore signal context->
629	 */
630	/* %gs was restored by the trampoline. */
631	regs->tf_fs     = context->sc_fs;
632	regs->tf_es     = context->sc_es;
633	regs->tf_ds     = context->sc_ds;
634	regs->tf_edi    = context->sc_edi;
635	regs->tf_esi    = context->sc_esi;
636	regs->tf_ebp    = context->sc_ebp;
637	regs->tf_ebx    = context->sc_ebx;
638	regs->tf_edx    = context->sc_edx;
639	regs->tf_ecx    = context->sc_ecx;
640	regs->tf_eax    = context->sc_eax;
641	regs->tf_eip    = context->sc_eip;
642	regs->tf_cs     = context->sc_cs;
643	regs->tf_eflags = eflags;
644	regs->tf_esp    = context->sc_esp_at_signal;
645	regs->tf_ss     = context->sc_ss;
646
647	/*
648	 * call sigaltstack & ignore results..
649	 */
650	ss = stackgap_alloc(&sg, sizeof(stack_t));
651	lss = &uc.uc_stack;
652	ss->ss_sp = lss->ss_sp;
653	ss->ss_size = lss->ss_size;
654	ss->ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
655
656#ifdef DEBUG
657	printf("Linux-emul(%ld): rt_sigret  flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x\n",
658	    (long)p->p_pid, ss->ss_flags, ss->ss_sp, ss->ss_size, context->sc_mask);
659#endif
660	sasargs.ss = ss;
661	sasargs.oss = NULL;
662	(void) sigaltstack(p, &sasargs);
663
664	return (EJUSTRETURN);
665}
666
667static void
668linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
669{
670	args[0] = tf->tf_ebx;
671	args[1] = tf->tf_ecx;
672	args[2] = tf->tf_edx;
673	args[3] = tf->tf_esi;
674	args[4] = tf->tf_edi;
675	*params = NULL;		/* no copyin */
676}
677
678/*
679 * If a linux binary is exec'ing something, try this image activator
680 * first.  We override standard shell script execution in order to
681 * be able to modify the interpreter path.  We only do this if a linux
682 * binary is doing the exec, so we do not create an EXEC module for it.
683 */
684static int	exec_linux_imgact_try __P((struct image_params *iparams));
685
686static int
687exec_linux_imgact_try(imgp)
688    struct image_params *imgp;
689{
690    const char *head = (const char *)imgp->image_header;
691    int error = -1;
692
693    /*
694     * The interpreter for shell scripts run from a linux binary needs
695     * to be located in /compat/linux if possible in order to recursively
696     * maintain linux path emulation.
697     */
698    if (((const short *)head)[0] == SHELLMAGIC) {
699	    /*
700	     * Run our normal shell image activator.  If it succeeds attempt
701	     * to use the alternate path for the interpreter.  If an alternate
702	     * path is found, use our stringspace to store it.
703	     */
704	    if ((error = exec_shell_imgact(imgp)) == 0) {
705		    char *rpath = NULL;
706
707		    linux_emul_find(imgp->proc, NULL, linux_emul_path,
708			imgp->interpreter_name, &rpath, 0);
709		    if (rpath != imgp->interpreter_name) {
710			    int len = strlen(rpath) + 1;
711
712			    if (len <= MAXSHELLCMDLEN) {
713				memcpy(imgp->interpreter_name, rpath, len);
714			    }
715			    free(rpath, M_TEMP);
716		    }
717	    }
718    }
719    return(error);
720}
721
722struct sysentvec linux_sysvec = {
723	LINUX_SYS_MAXSYSCALL,
724	linux_sysent,
725	0xff,
726	LINUX_SIGTBLSZ,
727	bsd_to_linux_signal,
728	ELAST + 1,
729	bsd_to_linux_errno,
730	translate_traps,
731	linux_fixup,
732	linux_sendsig,
733	linux_sigcode,
734	&linux_szsigcode,
735	linux_prepsyscall,
736	"Linux a.out",
737	aout_coredump,
738	exec_linux_imgact_try,
739	LINUX_MINSIGSTKSZ
740};
741
742struct sysentvec elf_linux_sysvec = {
743	LINUX_SYS_MAXSYSCALL,
744	linux_sysent,
745	0xff,
746	LINUX_SIGTBLSZ,
747	bsd_to_linux_signal,
748	ELAST + 1,
749	bsd_to_linux_errno,
750	translate_traps,
751	elf_linux_fixup,
752	linux_sendsig,
753	linux_sigcode,
754	&linux_szsigcode,
755	linux_prepsyscall,
756	"Linux ELF",
757	elf_coredump,
758	exec_linux_imgact_try,
759	LINUX_MINSIGSTKSZ
760};
761
762static Elf32_Brandinfo linux_brand = {
763					ELFOSABI_LINUX,
764					"/compat/linux",
765					"/lib/ld-linux.so.1",
766					&elf_linux_sysvec
767				 };
768
769static Elf32_Brandinfo linux_glibc2brand = {
770					ELFOSABI_LINUX,
771					"/compat/linux",
772					"/lib/ld-linux.so.2",
773					&elf_linux_sysvec
774				 };
775
776Elf32_Brandinfo *linux_brandlist[] = {
777					&linux_brand,
778					&linux_glibc2brand,
779					NULL
780				};
781
782static int
783linux_elf_modevent(module_t mod, int type, void *data)
784{
785	Elf32_Brandinfo **brandinfo;
786	int error;
787
788	error = 0;
789
790	switch(type) {
791	case MOD_LOAD:
792		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
793		     ++brandinfo)
794			if (elf_insert_brand_entry(*brandinfo) < 0)
795				error = EINVAL;
796		if (error)
797			printf("cannot insert Linux elf brand handler\n");
798		else {
799			linux_ioctl_register_handlers(&linux_ioctl_handler_set);
800			if (bootverbose)
801				printf("Linux-ELF exec handler installed\n");
802		}
803		break;
804	case MOD_UNLOAD:
805		linux_ioctl_unregister_handlers(&linux_ioctl_handler_set);
806		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
807		     ++brandinfo)
808			if (elf_brand_inuse(*brandinfo))
809				error = EBUSY;
810
811		if (error == 0) {
812			for (brandinfo = &linux_brandlist[0];
813			     *brandinfo != NULL; ++brandinfo)
814				if (elf_remove_brand_entry(*brandinfo) < 0)
815					error = EINVAL;
816		}
817		if (error)
818			printf("Could not deinstall ELF interpreter entry\n");
819		else if (bootverbose)
820			printf("Linux-elf exec handler removed\n");
821		break;
822	default:
823		break;
824	}
825	return error;
826}
827static moduledata_t linux_elf_mod = {
828	"linuxelf",
829	linux_elf_modevent,
830	0
831};
832DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
833