linux_sysvec.c revision 71039
1/*-
2 * Copyright (c) 1994-1996 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software withough specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: head/sys/i386/linux/linux_sysvec.c 71039 2001-01-14 18:16:01Z des $
29 */
30
31/* XXX we use functions that might not exist. */
32#include "opt_compat.h"
33
34#ifndef COMPAT_43
35#error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36#endif
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/proc.h>
41#include <sys/sysproto.h>
42#include <sys/sysent.h>
43#include <sys/imgact.h>
44#include <sys/imgact_aout.h>
45#include <sys/imgact_elf.h>
46#include <sys/signalvar.h>
47#include <sys/malloc.h>
48#include <vm/vm.h>
49#include <vm/vm_param.h>
50#include <vm/vm_page.h>
51#include <vm/vm_extern.h>
52#include <sys/exec.h>
53#include <sys/kernel.h>
54#include <sys/module.h>
55#include <machine/cpu.h>
56
57#include <i386/linux/linux.h>
58#include <i386/linux/linux_proto.h>
59#include <compat/linux/linux_signal.h>
60#include <compat/linux/linux_util.h>
61
62MODULE_VERSION(linux, 1);
63MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
64MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
65MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
66
67MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
68
69#if BYTE_ORDER == LITTLE_ENDIAN
70#define SHELLMAGIC      0x2123 /* #! */
71#else
72#define SHELLMAGIC      0x2321
73#endif
74
75extern char linux_sigcode[];
76extern int linux_szsigcode;
77
78extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
79
80extern struct linker_set linux_ioctl_handler_set;
81
82static int	linux_fixup __P((register_t **stack_base,
83				 struct image_params *iparams));
84static int	elf_linux_fixup __P((register_t **stack_base,
85				     struct image_params *iparams));
86static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
87				       u_int *code, caddr_t *params));
88static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
89				   u_long code));
90
91/*
92 * Linux syscalls return negative errno's, we do positive and map them
93 */
94static int bsd_to_linux_errno[ELAST + 1] = {
95  	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
96 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
97 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
98 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
99 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
100	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
101	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
102	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
103  	-6, -6, -43, -42, -75, -6, -84
104};
105
106int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
107	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
108	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
109	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
110	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
111	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
112	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
113	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
114	0, LINUX_SIGUSR1, LINUX_SIGUSR2
115};
116
117int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
118	SIGHUP, SIGINT, SIGQUIT, SIGILL,
119	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
120	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
121	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
122	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
123	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
124	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
125	SIGIO, SIGURG, 0
126};
127
128/*
129 * If FreeBSD & Linux have a difference of opinion about what a trap
130 * means, deal with it here.
131 */
132static int
133translate_traps(int signal, int trap_code)
134{
135	if (signal != SIGBUS)
136		return signal;
137	switch (trap_code) {
138	case T_PROTFLT:
139	case T_TSSFLT:
140	case T_DOUBLEFLT:
141	case T_PAGEFLT:
142		return SIGSEGV;
143	default:
144		return signal;
145	}
146}
147
148static int
149linux_fixup(register_t **stack_base, struct image_params *imgp)
150{
151	register_t *argv, *envp;
152
153	argv = *stack_base;
154	envp = *stack_base + (imgp->argc + 1);
155	(*stack_base)--;
156	**stack_base = (intptr_t)(void *)envp;
157	(*stack_base)--;
158	**stack_base = (intptr_t)(void *)argv;
159	(*stack_base)--;
160	**stack_base = imgp->argc;
161	return 0;
162}
163
164static int
165elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
166{
167	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
168	register_t *pos;
169
170	pos = *stack_base + (imgp->argc + imgp->envc + 2);
171
172	if (args->trace) {
173		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
174	}
175	if (args->execfd != -1) {
176		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
177	}
178	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
179	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
180	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
181	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
182	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
183	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
184	AUXARGS_ENTRY(pos, AT_BASE, args->base);
185	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid);
186	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid);
187	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid);
188	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid);
189	AUXARGS_ENTRY(pos, AT_NULL, 0);
190
191	free(imgp->auxargs, M_TEMP);
192	imgp->auxargs = NULL;
193
194	(*stack_base)--;
195	**stack_base = (long)imgp->argc;
196	return 0;
197}
198
199extern int _ucodesel, _udatasel;
200extern unsigned long _linux_sznonrtsigcode;
201
202static void
203linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
204{
205	register struct proc *p = curproc;
206	register struct trapframe *regs;
207	struct linux_rt_sigframe *fp, frame;
208	int oonstack;
209
210	regs = p->p_md.md_regs;
211	oonstack = sigonstack(regs->tf_esp);
212
213#ifdef DEBUG
214	printf("Linux-emul(%ld): linux_rt_sendsig(%p, %d, %p, %lu)\n",
215	    (long)p->p_pid, catcher, sig, (void*)mask, code);
216#endif
217	/*
218	 * Allocate space for the signal handler context.
219	 */
220	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
221	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
222		fp = (struct linux_rt_sigframe *)(p->p_sigstk.ss_sp +
223		    p->p_sigstk.ss_size - sizeof(struct linux_rt_sigframe));
224	} else
225		fp = (struct linux_rt_sigframe *)regs->tf_esp - 1;
226
227	/*
228	 * grow() will return FALSE if the fp will not fit inside the stack
229	 *	and the stack can not be grown. useracc will return FALSE
230	 *	if access is denied.
231	 */
232	if ((grow_stack (p, (int)fp) == FALSE) ||
233	    !useracc((caddr_t)fp, sizeof (struct linux_rt_sigframe),
234	    VM_PROT_WRITE)) {
235		/*
236		 * Process has trashed its stack; give it an illegal
237		 * instruction to halt it in its tracks.
238		 */
239		SIGACTION(p, SIGILL) = SIG_DFL;
240		SIGDELSET(p->p_sigignore, SIGILL);
241		SIGDELSET(p->p_sigcatch, SIGILL);
242		SIGDELSET(p->p_sigmask, SIGILL);
243#ifdef DEBUG
244		printf("Linux-emul(%ld): linux_rt_sendsig -- bad stack %p, "
245		    "oonstack=%x\n", (long)p->p_pid, fp, oonstack);
246#endif
247		psignal(p, SIGILL);
248		return;
249	}
250
251	/*
252	 * Build the argument list for the signal handler.
253	 */
254	if (p->p_sysent->sv_sigtbl)
255		if (sig <= p->p_sysent->sv_sigsize)
256			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
257
258	frame.sf_handler = catcher;
259	frame.sf_sig = sig;
260	frame.sf_siginfo = &fp->sf_si;
261	frame.sf_ucontext = &fp->sf_sc;
262
263	/* Fill siginfo structure. */
264	frame.sf_si.lsi_signo = sig;
265	frame.sf_si.lsi_code = code;
266	frame.sf_si.lsi_addr = (void *)regs->tf_err;
267
268	/*
269	 * Build the signal context to be used by sigreturn.
270	 */
271	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
272	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
273
274	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
275	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
276	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
277	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
278
279	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
280
281	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
282	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
283	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
284	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
285	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
286	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
287	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
288	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
289	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
290	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
291	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
292	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
293	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
294	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
295	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
296	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
297	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
298	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
299	frame.sf_sc.uc_mcontext.sc_trapno = code;	/* XXX ???? */
300
301#ifdef DEBUG
302	printf("Linux-emul(%ld): rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, "
303	    "mask: 0x%x\n", (long)p->p_pid, frame.sf_sc.uc_stack.ss_flags,
304	    p->p_sigstk.ss_sp, p->p_sigstk.ss_size,
305	    frame.sf_sc.uc_mcontext.sc_mask);
306#endif
307
308	if (copyout(&frame, fp, sizeof(frame)) != 0) {
309		/*
310		 * Process has trashed its stack; give it an illegal
311		 * instruction to halt it in its tracks.
312		 */
313		sigexit(p, SIGILL);
314		/* NOTREACHED */
315	}
316
317	/*
318	 * Build context to run handler in.
319	 */
320	regs->tf_esp = (int)fp;
321	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
322	    _linux_sznonrtsigcode;
323	regs->tf_eflags &= ~PSL_VM;
324	regs->tf_cs = _ucodesel;
325	regs->tf_ds = _udatasel;
326	regs->tf_es = _udatasel;
327	regs->tf_fs = _udatasel;
328	load_gs(_udatasel);
329	regs->tf_ss = _udatasel;
330}
331
332
333/*
334 * Send an interrupt to process.
335 *
336 * Stack is set up to allow sigcode stored
337 * in u. to call routine, followed by kcall
338 * to sigreturn routine below.  After sigreturn
339 * resets the signal mask, the stack, and the
340 * frame pointer, it returns to the user
341 * specified pc, psl.
342 */
343
344static void
345linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
346{
347	register struct proc *p = curproc;
348	register struct trapframe *regs;
349	struct linux_sigframe *fp, frame;
350	linux_sigset_t lmask;
351	int oonstack, i;
352
353	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
354		/* Signal handler installed with SA_SIGINFO. */
355		linux_rt_sendsig(catcher, sig, mask, code);
356		return;
357	}
358
359	regs = p->p_md.md_regs;
360	oonstack = sigonstack(regs->tf_esp);
361
362#ifdef DEBUG
363	printf("Linux-emul(%ld): linux_sendsig(%p, %d, %p, %lu)\n",
364	    (long)p->p_pid, catcher, sig, (void*)mask, code);
365#endif
366
367	/*
368	 * Allocate space for the signal handler context.
369	 */
370	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
371	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
372		fp = (struct linux_sigframe *)(p->p_sigstk.ss_sp +
373		    p->p_sigstk.ss_size - sizeof(struct linux_sigframe));
374	} else
375		fp = (struct linux_sigframe *)regs->tf_esp - 1;
376
377	/*
378	 * grow() will return FALSE if the fp will not fit inside the stack
379	 *	and the stack can not be grown. useracc will return FALSE
380	 *	if access is denied.
381	 */
382	if ((grow_stack (p, (int)fp) == FALSE) ||
383	    !useracc((caddr_t)fp, sizeof (struct linux_sigframe),
384	    VM_PROT_WRITE)) {
385		/*
386		 * Process has trashed its stack; give it an illegal
387		 * instruction to halt it in its tracks.
388		 */
389		SIGACTION(p, SIGILL) = SIG_DFL;
390		SIGDELSET(p->p_sigignore, SIGILL);
391		SIGDELSET(p->p_sigcatch, SIGILL);
392		SIGDELSET(p->p_sigmask, SIGILL);
393		psignal(p, SIGILL);
394		return;
395	}
396
397	/*
398	 * Build the argument list for the signal handler.
399	 */
400	if (p->p_sysent->sv_sigtbl)
401		if (sig <= p->p_sysent->sv_sigsize)
402			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
403
404	frame.sf_handler = catcher;
405	frame.sf_sig = sig;
406
407	bsd_to_linux_sigset(mask, &lmask);
408
409	/*
410	 * Build the signal context to be used by sigreturn.
411	 */
412	frame.sf_sc.sc_mask   = lmask.__bits[0];
413	frame.sf_sc.sc_gs     = rgs();
414	frame.sf_sc.sc_fs     = regs->tf_fs;
415	frame.sf_sc.sc_es     = regs->tf_es;
416	frame.sf_sc.sc_ds     = regs->tf_ds;
417	frame.sf_sc.sc_edi    = regs->tf_edi;
418	frame.sf_sc.sc_esi    = regs->tf_esi;
419	frame.sf_sc.sc_ebp    = regs->tf_ebp;
420	frame.sf_sc.sc_ebx    = regs->tf_ebx;
421	frame.sf_sc.sc_edx    = regs->tf_edx;
422	frame.sf_sc.sc_ecx    = regs->tf_ecx;
423	frame.sf_sc.sc_eax    = regs->tf_eax;
424	frame.sf_sc.sc_eip    = regs->tf_eip;
425	frame.sf_sc.sc_cs     = regs->tf_cs;
426	frame.sf_sc.sc_eflags = regs->tf_eflags;
427	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
428	frame.sf_sc.sc_ss     = regs->tf_ss;
429	frame.sf_sc.sc_err    = regs->tf_err;
430	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
431
432	bzero(&frame.sf_fpstate, sizeof(struct linux_fpstate));
433
434	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
435		frame.sf_extramask[i] = lmask.__bits[i+1];
436
437	if (copyout(&frame, fp, sizeof(frame)) != 0) {
438		/*
439		 * Process has trashed its stack; give it an illegal
440		 * instruction to halt it in its tracks.
441		 */
442		sigexit(p, SIGILL);
443		/* NOTREACHED */
444	}
445
446	/*
447	 * Build context to run handler in.
448	 */
449	regs->tf_esp = (int)fp;
450	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
451	regs->tf_eflags &= ~PSL_VM;
452	regs->tf_cs = _ucodesel;
453	regs->tf_ds = _udatasel;
454	regs->tf_es = _udatasel;
455	regs->tf_fs = _udatasel;
456	load_gs(_udatasel);
457	regs->tf_ss = _udatasel;
458}
459
460/*
461 * System call to cleanup state after a signal
462 * has been taken.  Reset signal mask and
463 * stack state from context left by sendsig (above).
464 * Return to previous pc and psl as specified by
465 * context left by sendsig. Check carefully to
466 * make sure that the user has not modified the
467 * psl to gain improper privileges or to cause
468 * a machine fault.
469 */
470int
471linux_sigreturn(p, args)
472	struct proc *p;
473	struct linux_sigreturn_args *args;
474{
475	struct linux_sigframe frame;
476	register struct trapframe *regs;
477	linux_sigset_t lmask;
478	int eflags, i;
479
480	regs = p->p_md.md_regs;
481
482#ifdef DEBUG
483	printf("Linux-emul(%ld): linux_sigreturn(%p)\n",
484	    (long)p->p_pid, (void *)args->sfp);
485#endif
486	/*
487	 * The trampoline code hands us the sigframe.
488	 * It is unsafe to keep track of it ourselves, in the event that a
489	 * program jumps out of a signal handler.
490	 */
491	if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
492		return (EFAULT);
493
494	/*
495	 * Check for security violations.
496	 */
497#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
498	eflags = frame.sf_sc.sc_eflags;
499	/*
500	 * XXX do allow users to change the privileged flag PSL_RF.  The
501	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
502	 * sometimes set it there too.  tf_eflags is kept in the signal
503	 * context during signal handling and there is no other place
504	 * to remember it, so the PSL_RF bit may be corrupted by the
505	 * signal handler without us knowing.  Corruption of the PSL_RF
506	 * bit at worst causes one more or one less debugger trap, so
507	 * allowing it is fairly harmless.
508	 */
509	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
510    		return(EINVAL);
511	}
512
513	/*
514	 * Don't allow users to load a valid privileged %cs.  Let the
515	 * hardware check for invalid selectors, excess privilege in
516	 * other selectors, invalid %eip's and invalid %esp's.
517	 */
518#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
519	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
520		trapsignal(p, SIGBUS, T_PROTFLT);
521		return(EINVAL);
522	}
523
524	lmask.__bits[0] = frame.sf_sc.sc_mask;
525	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
526		lmask.__bits[i+1] = frame.sf_extramask[i];
527	linux_to_bsd_sigset(&lmask, &p->p_sigmask);
528	SIG_CANTMASK(p->p_sigmask);
529
530	/*
531	 * Restore signal context.
532	 */
533	/* %gs was restored by the trampoline. */
534	regs->tf_fs     = frame.sf_sc.sc_fs;
535	regs->tf_es     = frame.sf_sc.sc_es;
536	regs->tf_ds     = frame.sf_sc.sc_ds;
537	regs->tf_edi    = frame.sf_sc.sc_edi;
538	regs->tf_esi    = frame.sf_sc.sc_esi;
539	regs->tf_ebp    = frame.sf_sc.sc_ebp;
540	regs->tf_ebx    = frame.sf_sc.sc_ebx;
541	regs->tf_edx    = frame.sf_sc.sc_edx;
542	regs->tf_ecx    = frame.sf_sc.sc_ecx;
543	regs->tf_eax    = frame.sf_sc.sc_eax;
544	regs->tf_eip    = frame.sf_sc.sc_eip;
545	regs->tf_cs     = frame.sf_sc.sc_cs;
546	regs->tf_eflags = eflags;
547	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
548	regs->tf_ss     = frame.sf_sc.sc_ss;
549
550	return (EJUSTRETURN);
551}
552
553/*
554 * System call to cleanup state after a signal
555 * has been taken.  Reset signal mask and
556 * stack state from context left by rt_sendsig (above).
557 * Return to previous pc and psl as specified by
558 * context left by sendsig. Check carefully to
559 * make sure that the user has not modified the
560 * psl to gain improper privileges or to cause
561 * a machine fault.
562 */
563int
564linux_rt_sigreturn(p, args)
565	struct proc *p;
566	struct linux_rt_sigreturn_args *args;
567{
568	struct sigaltstack_args sasargs;
569	struct linux_ucontext 	 uc;
570	struct linux_sigcontext *context;
571	linux_stack_t *lss;
572	stack_t *ss;
573	register struct trapframe *regs;
574	int eflags;
575	caddr_t sg = stackgap_init();
576
577	regs = p->p_md.md_regs;
578
579#ifdef DEBUG
580	printf("Linux-emul(%ld): linux_rt_sigreturn(%p)\n",
581	    (long)p->p_pid, (void *)args->ucp);
582#endif
583	/*
584	 * The trampoline code hands us the ucontext.
585	 * It is unsafe to keep track of it ourselves, in the event that a
586	 * program jumps out of a signal handler.
587	 */
588	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
589		return (EFAULT);
590
591	context = &uc.uc_mcontext;
592
593	/*
594	 * Check for security violations.
595	 */
596#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
597	eflags = context->sc_eflags;
598	/*
599	 * XXX do allow users to change the privileged flag PSL_RF.  The
600	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
601	 * sometimes set it there too.  tf_eflags is kept in the signal
602	 * context during signal handling and there is no other place
603	 * to remember it, so the PSL_RF bit may be corrupted by the
604	 * signal handler without us knowing.  Corruption of the PSL_RF
605	 * bit at worst causes one more or one less debugger trap, so
606	 * allowing it is fairly harmless.
607	 */
608	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
609    		return(EINVAL);
610	}
611
612	/*
613	 * Don't allow users to load a valid privileged %cs.  Let the
614	 * hardware check for invalid selectors, excess privilege in
615	 * other selectors, invalid %eip's and invalid %esp's.
616	 */
617#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
618	if (!CS_SECURE(context->sc_cs)) {
619		trapsignal(p, SIGBUS, T_PROTFLT);
620		return(EINVAL);
621	}
622
623	linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
624	SIG_CANTMASK(p->p_sigmask);
625
626	/*
627	 * Restore signal context
628	 */
629	/* %gs was restored by the trampoline. */
630	regs->tf_fs     = context->sc_fs;
631	regs->tf_es     = context->sc_es;
632	regs->tf_ds     = context->sc_ds;
633	regs->tf_edi    = context->sc_edi;
634	regs->tf_esi    = context->sc_esi;
635	regs->tf_ebp    = context->sc_ebp;
636	regs->tf_ebx    = context->sc_ebx;
637	regs->tf_edx    = context->sc_edx;
638	regs->tf_ecx    = context->sc_ecx;
639	regs->tf_eax    = context->sc_eax;
640	regs->tf_eip    = context->sc_eip;
641	regs->tf_cs     = context->sc_cs;
642	regs->tf_eflags = eflags;
643	regs->tf_esp    = context->sc_esp_at_signal;
644	regs->tf_ss     = context->sc_ss;
645
646	/*
647	 * call sigaltstack & ignore results..
648	 */
649	ss = stackgap_alloc(&sg, sizeof(stack_t));
650	lss = &uc.uc_stack;
651	ss->ss_sp = lss->ss_sp;
652	ss->ss_size = lss->ss_size;
653	ss->ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
654
655#ifdef DEBUG
656	printf("Linux-emul(%ld): rt_sigret  flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x\n",
657	    (long)p->p_pid, ss->ss_flags, ss->ss_sp, ss->ss_size, context->sc_mask);
658#endif
659	sasargs.ss = ss;
660	sasargs.oss = NULL;
661	(void) sigaltstack(p, &sasargs);
662
663	return (EJUSTRETURN);
664}
665
666static void
667linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
668{
669	args[0] = tf->tf_ebx;
670	args[1] = tf->tf_ecx;
671	args[2] = tf->tf_edx;
672	args[3] = tf->tf_esi;
673	args[4] = tf->tf_edi;
674	*params = NULL;		/* no copyin */
675}
676
677/*
678 * If a linux binary is exec'ing something, try this image activator
679 * first.  We override standard shell script execution in order to
680 * be able to modify the interpreter path.  We only do this if a linux
681 * binary is doing the exec, so we do not create an EXEC module for it.
682 */
683static int	exec_linux_imgact_try __P((struct image_params *iparams));
684
685static int
686exec_linux_imgact_try(imgp)
687    struct image_params *imgp;
688{
689    const char *head = (const char *)imgp->image_header;
690    int error = -1;
691
692    /*
693     * The interpreter for shell scripts run from a linux binary needs
694     * to be located in /compat/linux if possible in order to recursively
695     * maintain linux path emulation.
696     */
697    if (((const short *)head)[0] == SHELLMAGIC) {
698	    /*
699	     * Run our normal shell image activator.  If it succeeds attempt
700	     * to use the alternate path for the interpreter.  If an alternate
701	     * path is found, use our stringspace to store it.
702	     */
703	    if ((error = exec_shell_imgact(imgp)) == 0) {
704		    char *rpath = NULL;
705
706		    linux_emul_find(imgp->proc, NULL, linux_emul_path,
707			imgp->interpreter_name, &rpath, 0);
708		    if (rpath != imgp->interpreter_name) {
709			    int len = strlen(rpath) + 1;
710
711			    if (len <= MAXSHELLCMDLEN) {
712				memcpy(imgp->interpreter_name, rpath, len);
713			    }
714			    free(rpath, M_TEMP);
715		    }
716	    }
717    }
718    return(error);
719}
720
721struct sysentvec linux_sysvec = {
722	LINUX_SYS_MAXSYSCALL,
723	linux_sysent,
724	0xff,
725	LINUX_SIGTBLSZ,
726	bsd_to_linux_signal,
727	ELAST + 1,
728	bsd_to_linux_errno,
729	translate_traps,
730	linux_fixup,
731	linux_sendsig,
732	linux_sigcode,
733	&linux_szsigcode,
734	linux_prepsyscall,
735	"Linux a.out",
736	aout_coredump,
737	exec_linux_imgact_try,
738	LINUX_MINSIGSTKSZ
739};
740
741struct sysentvec elf_linux_sysvec = {
742	LINUX_SYS_MAXSYSCALL,
743	linux_sysent,
744	0xff,
745	LINUX_SIGTBLSZ,
746	bsd_to_linux_signal,
747	ELAST + 1,
748	bsd_to_linux_errno,
749	translate_traps,
750	elf_linux_fixup,
751	linux_sendsig,
752	linux_sigcode,
753	&linux_szsigcode,
754	linux_prepsyscall,
755	"Linux ELF",
756	elf_coredump,
757	exec_linux_imgact_try,
758	LINUX_MINSIGSTKSZ
759};
760
761static Elf32_Brandinfo linux_brand = {
762					ELFOSABI_LINUX,
763					"/compat/linux",
764					"/lib/ld-linux.so.1",
765					&elf_linux_sysvec
766				 };
767
768static Elf32_Brandinfo linux_glibc2brand = {
769					ELFOSABI_LINUX,
770					"/compat/linux",
771					"/lib/ld-linux.so.2",
772					&elf_linux_sysvec
773				 };
774
775Elf32_Brandinfo *linux_brandlist[] = {
776					&linux_brand,
777					&linux_glibc2brand,
778					NULL
779				};
780
781static int
782linux_elf_modevent(module_t mod, int type, void *data)
783{
784	Elf32_Brandinfo **brandinfo;
785	int error;
786
787	error = 0;
788
789	switch(type) {
790	case MOD_LOAD:
791		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
792		     ++brandinfo)
793			if (elf_insert_brand_entry(*brandinfo) < 0)
794				error = EINVAL;
795		if (error == 0) {
796			linux_ioctl_register_handlers(
797				&linux_ioctl_handler_set);
798			if (bootverbose)
799				printf("Linux ELF exec handler installed\n");
800		} else
801			printf("cannot insert Linux ELF brand handler\n");
802		break;
803	case MOD_UNLOAD:
804		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
805		     ++brandinfo)
806			if (elf_brand_inuse(*brandinfo))
807				error = EBUSY;
808		if (error == 0) {
809			for (brandinfo = &linux_brandlist[0];
810			     *brandinfo != NULL; ++brandinfo)
811				if (elf_remove_brand_entry(*brandinfo) < 0)
812					error = EINVAL;
813		}
814		if (error == 0) {
815			linux_ioctl_unregister_handlers(
816				&linux_ioctl_handler_set);
817			if (bootverbose)
818				printf("Linux ELF exec handler removed\n");
819		} else
820			printf("Could not deinstall ELF interpreter entry\n");
821		break;
822	default:
823		break;
824	}
825	return error;
826}
827
828static moduledata_t linux_elf_mod = {
829	"linuxelf",
830	linux_elf_modevent,
831	0
832};
833
834DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
835