linux_sysvec.c revision 76827
1/*-
2 * Copyright (c) 1994-1996 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software withough specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: head/sys/i386/linux/linux_sysvec.c 76827 2001-05-19 01:28:09Z alfred $
29 */
30
31/* XXX we use functions that might not exist. */
32#include "opt_compat.h"
33
34#ifndef COMPAT_43
35#error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36#endif
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/imgact.h>
41#include <sys/imgact_aout.h>
42#include <sys/imgact_elf.h>
43#include <sys/malloc.h>
44#include <sys/mutex.h>
45#include <sys/proc.h>
46#include <sys/signalvar.h>
47#include <sys/sysent.h>
48#include <sys/sysproto.h>
49
50#include <vm/vm.h>
51#include <vm/vm_param.h>
52#include <vm/vm_page.h>
53#include <vm/vm_extern.h>
54#include <sys/exec.h>
55#include <sys/kernel.h>
56#include <sys/module.h>
57#include <machine/cpu.h>
58#include <sys/lock.h>
59#include <sys/mutex.h>
60
61#include <i386/linux/linux.h>
62#include <i386/linux/linux_proto.h>
63#include <compat/linux/linux_signal.h>
64#include <compat/linux/linux_util.h>
65
66MODULE_VERSION(linux, 1);
67MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
68MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
69MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
70
71MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
72
73#if BYTE_ORDER == LITTLE_ENDIAN
74#define SHELLMAGIC      0x2123 /* #! */
75#else
76#define SHELLMAGIC      0x2321
77#endif
78
79extern char linux_sigcode[];
80extern int linux_szsigcode;
81
82extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
83
84extern struct linker_set linux_ioctl_handler_set;
85
86static int	linux_fixup __P((register_t **stack_base,
87				 struct image_params *iparams));
88static int	elf_linux_fixup __P((register_t **stack_base,
89				     struct image_params *iparams));
90static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
91				       u_int *code, caddr_t *params));
92static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
93				   u_long code));
94
95/*
96 * Linux syscalls return negative errno's, we do positive and map them
97 */
98static int bsd_to_linux_errno[ELAST + 1] = {
99  	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
100 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
101 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
102 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
103 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
104	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
105	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
106	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
107  	-6, -6, -43, -42, -75, -6, -84
108};
109
110int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
111	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
112	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
113	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
114	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
115	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
116	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
117	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
118	0, LINUX_SIGUSR1, LINUX_SIGUSR2
119};
120
121int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
122	SIGHUP, SIGINT, SIGQUIT, SIGILL,
123	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
124	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
125	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
126	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
127	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
128	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
129	SIGIO, SIGURG, 0
130};
131
132/*
133 * If FreeBSD & Linux have a difference of opinion about what a trap
134 * means, deal with it here.
135 */
136static int
137translate_traps(int signal, int trap_code)
138{
139	if (signal != SIGBUS)
140		return signal;
141	switch (trap_code) {
142	case T_PROTFLT:
143	case T_TSSFLT:
144	case T_DOUBLEFLT:
145	case T_PAGEFLT:
146		return SIGSEGV;
147	default:
148		return signal;
149	}
150}
151
152static int
153linux_fixup(register_t **stack_base, struct image_params *imgp)
154{
155	register_t *argv, *envp;
156
157	argv = *stack_base;
158	envp = *stack_base + (imgp->argc + 1);
159	(*stack_base)--;
160	**stack_base = (intptr_t)(void *)envp;
161	(*stack_base)--;
162	**stack_base = (intptr_t)(void *)argv;
163	(*stack_base)--;
164	**stack_base = imgp->argc;
165	return 0;
166}
167
168static int
169elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
170{
171	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
172	register_t *pos;
173
174	pos = *stack_base + (imgp->argc + imgp->envc + 2);
175
176	if (args->trace) {
177		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
178	}
179	if (args->execfd != -1) {
180		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
181	}
182	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
183	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
184	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
185	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
186	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
187	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
188	AUXARGS_ENTRY(pos, AT_BASE, args->base);
189	PROC_LOCK(imgp->proc);
190	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid);
191	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid);
192	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid);
193	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid);
194	PROC_UNLOCK(imgp->proc);
195	AUXARGS_ENTRY(pos, AT_NULL, 0);
196
197	free(imgp->auxargs, M_TEMP);
198	imgp->auxargs = NULL;
199
200	(*stack_base)--;
201	**stack_base = (long)imgp->argc;
202	return 0;
203}
204
205extern int _ucodesel, _udatasel;
206extern unsigned long linux_sznonrtsigcode;
207
208static void
209linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
210{
211	register struct proc *p = curproc;
212	register struct trapframe *regs;
213	struct linux_rt_sigframe *fp, frame;
214	int oonstack;
215
216	regs = p->p_md.md_regs;
217	oonstack = sigonstack(regs->tf_esp);
218
219#ifdef DEBUG
220	if (ldebug(sigreturn))
221		printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
222		    catcher, sig, (void*)mask, code);
223#endif
224	/*
225	 * Allocate space for the signal handler context.
226	 */
227	PROC_LOCK(p);
228	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
229	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
230		fp = (struct linux_rt_sigframe *)(p->p_sigstk.ss_sp +
231		    p->p_sigstk.ss_size - sizeof(struct linux_rt_sigframe));
232	} else
233		fp = (struct linux_rt_sigframe *)regs->tf_esp - 1;
234	PROC_UNLOCK(p);
235
236	/*
237	 * grow() will return FALSE if the fp will not fit inside the stack
238	 *	and the stack can not be grown. useracc will return FALSE
239	 *	if access is denied.
240	 */
241	if ((grow_stack (p, (int)fp) == FALSE) ||
242	    !useracc((caddr_t)fp, sizeof (struct linux_rt_sigframe),
243	    VM_PROT_WRITE)) {
244		/*
245		 * Process has trashed its stack; give it an illegal
246		 * instruction to halt it in its tracks.
247		 */
248		PROC_LOCK(p);
249		SIGACTION(p, SIGILL) = SIG_DFL;
250		SIGDELSET(p->p_sigignore, SIGILL);
251		SIGDELSET(p->p_sigcatch, SIGILL);
252		SIGDELSET(p->p_sigmask, SIGILL);
253#ifdef DEBUG
254		if (ldebug(sigreturn))
255			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
256			    fp, oonstack);
257#endif
258		psignal(p, SIGILL);
259		PROC_UNLOCK(p);
260		return;
261	}
262
263	/*
264	 * Build the argument list for the signal handler.
265	 */
266	if (p->p_sysent->sv_sigtbl)
267		if (sig <= p->p_sysent->sv_sigsize)
268			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
269
270	frame.sf_handler = catcher;
271	frame.sf_sig = sig;
272	frame.sf_siginfo = &fp->sf_si;
273	frame.sf_ucontext = &fp->sf_sc;
274
275	/* Fill siginfo structure. */
276	frame.sf_si.lsi_signo = sig;
277	frame.sf_si.lsi_code = code;
278	frame.sf_si.lsi_addr = (void *)regs->tf_err;
279
280	/*
281	 * Build the signal context to be used by sigreturn.
282	 */
283	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
284	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
285
286	PROC_LOCK(p);
287	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
288	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
289	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
290	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
291	PROC_UNLOCK(p);
292
293	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
294
295	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
296	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
297	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
298	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
299	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
300	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
301	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
302	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
303	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
304	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
305	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
306	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
307	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
308	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
309	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
310	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
311	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
312	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
313	frame.sf_sc.uc_mcontext.sc_trapno = code;	/* XXX ???? */
314
315#ifdef DEBUG
316	if (ldebug(sigreturn))
317		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
318		    frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
319		    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
320#endif
321
322	if (copyout(&frame, fp, sizeof(frame)) != 0) {
323		/*
324		 * Process has trashed its stack; give it an illegal
325		 * instruction to halt it in its tracks.
326		 */
327		PROC_LOCK(p);
328		sigexit(p, SIGILL);
329		/* NOTREACHED */
330	}
331
332	/*
333	 * Build context to run handler in.
334	 */
335	regs->tf_esp = (int)fp;
336	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
337	    linux_sznonrtsigcode;
338	regs->tf_eflags &= ~PSL_VM;
339	regs->tf_cs = _ucodesel;
340	regs->tf_ds = _udatasel;
341	regs->tf_es = _udatasel;
342	regs->tf_fs = _udatasel;
343	regs->tf_ss = _udatasel;
344}
345
346
347/*
348 * Send an interrupt to process.
349 *
350 * Stack is set up to allow sigcode stored
351 * in u. to call routine, followed by kcall
352 * to sigreturn routine below.  After sigreturn
353 * resets the signal mask, the stack, and the
354 * frame pointer, it returns to the user
355 * specified pc, psl.
356 */
357
358static void
359linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
360{
361	register struct proc *p = curproc;
362	register struct trapframe *regs;
363	struct linux_sigframe *fp, frame;
364	linux_sigset_t lmask;
365	int oonstack, i;
366
367	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
368		/* Signal handler installed with SA_SIGINFO. */
369		linux_rt_sendsig(catcher, sig, mask, code);
370		return;
371	}
372
373	regs = p->p_md.md_regs;
374	oonstack = sigonstack(regs->tf_esp);
375
376#ifdef DEBUG
377	if (ldebug(sigreturn))
378		printf(ARGS(sendsig, "%p, %d, %p, %lu"),
379		    catcher, sig, (void*)mask, code);
380#endif
381
382	/*
383	 * Allocate space for the signal handler context.
384	 */
385	PROC_LOCK(p);
386	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
387	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
388		fp = (struct linux_sigframe *)(p->p_sigstk.ss_sp +
389		    p->p_sigstk.ss_size - sizeof(struct linux_sigframe));
390	} else
391		fp = (struct linux_sigframe *)regs->tf_esp - 1;
392	PROC_UNLOCK(p);
393
394	/*
395	 * grow() will return FALSE if the fp will not fit inside the stack
396	 *	and the stack can not be grown. useracc will return FALSE
397	 *	if access is denied.
398	 */
399	if ((grow_stack (p, (int)fp) == FALSE) ||
400	    !useracc((caddr_t)fp, sizeof (struct linux_sigframe),
401	    VM_PROT_WRITE)) {
402		/*
403		 * Process has trashed its stack; give it an illegal
404		 * instruction to halt it in its tracks.
405		 */
406		PROC_LOCK(p);
407		SIGACTION(p, SIGILL) = SIG_DFL;
408		SIGDELSET(p->p_sigignore, SIGILL);
409		SIGDELSET(p->p_sigcatch, SIGILL);
410		SIGDELSET(p->p_sigmask, SIGILL);
411		psignal(p, SIGILL);
412		PROC_UNLOCK(p);
413		return;
414	}
415
416	/*
417	 * Build the argument list for the signal handler.
418	 */
419	if (p->p_sysent->sv_sigtbl)
420		if (sig <= p->p_sysent->sv_sigsize)
421			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
422
423	frame.sf_handler = catcher;
424	frame.sf_sig = sig;
425
426	bsd_to_linux_sigset(mask, &lmask);
427
428	/*
429	 * Build the signal context to be used by sigreturn.
430	 */
431	frame.sf_sc.sc_mask   = lmask.__bits[0];
432	frame.sf_sc.sc_gs     = rgs();
433	frame.sf_sc.sc_fs     = regs->tf_fs;
434	frame.sf_sc.sc_es     = regs->tf_es;
435	frame.sf_sc.sc_ds     = regs->tf_ds;
436	frame.sf_sc.sc_edi    = regs->tf_edi;
437	frame.sf_sc.sc_esi    = regs->tf_esi;
438	frame.sf_sc.sc_ebp    = regs->tf_ebp;
439	frame.sf_sc.sc_ebx    = regs->tf_ebx;
440	frame.sf_sc.sc_edx    = regs->tf_edx;
441	frame.sf_sc.sc_ecx    = regs->tf_ecx;
442	frame.sf_sc.sc_eax    = regs->tf_eax;
443	frame.sf_sc.sc_eip    = regs->tf_eip;
444	frame.sf_sc.sc_cs     = regs->tf_cs;
445	frame.sf_sc.sc_eflags = regs->tf_eflags;
446	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
447	frame.sf_sc.sc_ss     = regs->tf_ss;
448	frame.sf_sc.sc_err    = regs->tf_err;
449	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
450
451	bzero(&frame.sf_fpstate, sizeof(struct linux_fpstate));
452
453	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
454		frame.sf_extramask[i] = lmask.__bits[i+1];
455
456	if (copyout(&frame, fp, sizeof(frame)) != 0) {
457		/*
458		 * Process has trashed its stack; give it an illegal
459		 * instruction to halt it in its tracks.
460		 */
461		PROC_LOCK(p);
462		sigexit(p, SIGILL);
463		/* NOTREACHED */
464	}
465
466	/*
467	 * Build context to run handler in.
468	 */
469	regs->tf_esp = (int)fp;
470	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
471	regs->tf_eflags &= ~PSL_VM;
472	regs->tf_cs = _ucodesel;
473	regs->tf_ds = _udatasel;
474	regs->tf_es = _udatasel;
475	regs->tf_fs = _udatasel;
476	regs->tf_ss = _udatasel;
477}
478
479/*
480 * System call to cleanup state after a signal
481 * has been taken.  Reset signal mask and
482 * stack state from context left by sendsig (above).
483 * Return to previous pc and psl as specified by
484 * context left by sendsig. Check carefully to
485 * make sure that the user has not modified the
486 * psl to gain improper privileges or to cause
487 * a machine fault.
488 */
489int
490linux_sigreturn(p, args)
491	struct proc *p;
492	struct linux_sigreturn_args *args;
493{
494	struct linux_sigframe frame;
495	register struct trapframe *regs;
496	linux_sigset_t lmask;
497	int eflags, i;
498
499	regs = p->p_md.md_regs;
500
501#ifdef DEBUG
502	if (ldebug(sigreturn))
503		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
504#endif
505	/*
506	 * The trampoline code hands us the sigframe.
507	 * It is unsafe to keep track of it ourselves, in the event that a
508	 * program jumps out of a signal handler.
509	 */
510	if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
511		return (EFAULT);
512
513	/*
514	 * Check for security violations.
515	 */
516#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
517	eflags = frame.sf_sc.sc_eflags;
518	/*
519	 * XXX do allow users to change the privileged flag PSL_RF.  The
520	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
521	 * sometimes set it there too.  tf_eflags is kept in the signal
522	 * context during signal handling and there is no other place
523	 * to remember it, so the PSL_RF bit may be corrupted by the
524	 * signal handler without us knowing.  Corruption of the PSL_RF
525	 * bit at worst causes one more or one less debugger trap, so
526	 * allowing it is fairly harmless.
527	 */
528	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
529    		return(EINVAL);
530	}
531
532	/*
533	 * Don't allow users to load a valid privileged %cs.  Let the
534	 * hardware check for invalid selectors, excess privilege in
535	 * other selectors, invalid %eip's and invalid %esp's.
536	 */
537#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
538	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
539		trapsignal(p, SIGBUS, T_PROTFLT);
540		return(EINVAL);
541	}
542
543	lmask.__bits[0] = frame.sf_sc.sc_mask;
544	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
545		lmask.__bits[i+1] = frame.sf_extramask[i];
546	PROC_LOCK(p);
547	linux_to_bsd_sigset(&lmask, &p->p_sigmask);
548	SIG_CANTMASK(p->p_sigmask);
549	PROC_UNLOCK(p);
550
551	/*
552	 * Restore signal context.
553	 */
554	/* %gs was restored by the trampoline. */
555	regs->tf_fs     = frame.sf_sc.sc_fs;
556	regs->tf_es     = frame.sf_sc.sc_es;
557	regs->tf_ds     = frame.sf_sc.sc_ds;
558	regs->tf_edi    = frame.sf_sc.sc_edi;
559	regs->tf_esi    = frame.sf_sc.sc_esi;
560	regs->tf_ebp    = frame.sf_sc.sc_ebp;
561	regs->tf_ebx    = frame.sf_sc.sc_ebx;
562	regs->tf_edx    = frame.sf_sc.sc_edx;
563	regs->tf_ecx    = frame.sf_sc.sc_ecx;
564	regs->tf_eax    = frame.sf_sc.sc_eax;
565	regs->tf_eip    = frame.sf_sc.sc_eip;
566	regs->tf_cs     = frame.sf_sc.sc_cs;
567	regs->tf_eflags = eflags;
568	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
569	regs->tf_ss     = frame.sf_sc.sc_ss;
570
571	return (EJUSTRETURN);
572}
573
574/*
575 * System call to cleanup state after a signal
576 * has been taken.  Reset signal mask and
577 * stack state from context left by rt_sendsig (above).
578 * Return to previous pc and psl as specified by
579 * context left by sendsig. Check carefully to
580 * make sure that the user has not modified the
581 * psl to gain improper privileges or to cause
582 * a machine fault.
583 */
584int
585linux_rt_sigreturn(p, args)
586	struct proc *p;
587	struct linux_rt_sigreturn_args *args;
588{
589	struct sigaltstack_args sasargs;
590	struct linux_ucontext 	 uc;
591	struct linux_sigcontext *context;
592	linux_stack_t *lss;
593	stack_t *ss;
594	register struct trapframe *regs;
595	int eflags;
596	caddr_t sg = stackgap_init();
597
598	regs = p->p_md.md_regs;
599
600#ifdef DEBUG
601	if (ldebug(rt_sigreturn))
602		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
603#endif
604	/*
605	 * The trampoline code hands us the ucontext.
606	 * It is unsafe to keep track of it ourselves, in the event that a
607	 * program jumps out of a signal handler.
608	 */
609	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
610		return (EFAULT);
611
612	context = &uc.uc_mcontext;
613
614	/*
615	 * Check for security violations.
616	 */
617#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
618	eflags = context->sc_eflags;
619	/*
620	 * XXX do allow users to change the privileged flag PSL_RF.  The
621	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
622	 * sometimes set it there too.  tf_eflags is kept in the signal
623	 * context during signal handling and there is no other place
624	 * to remember it, so the PSL_RF bit may be corrupted by the
625	 * signal handler without us knowing.  Corruption of the PSL_RF
626	 * bit at worst causes one more or one less debugger trap, so
627	 * allowing it is fairly harmless.
628	 */
629	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
630    		return(EINVAL);
631	}
632
633	/*
634	 * Don't allow users to load a valid privileged %cs.  Let the
635	 * hardware check for invalid selectors, excess privilege in
636	 * other selectors, invalid %eip's and invalid %esp's.
637	 */
638#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
639	if (!CS_SECURE(context->sc_cs)) {
640		trapsignal(p, SIGBUS, T_PROTFLT);
641		return(EINVAL);
642	}
643
644	PROC_LOCK(p);
645	linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
646	SIG_CANTMASK(p->p_sigmask);
647	PROC_UNLOCK(p);
648
649	/*
650	 * Restore signal context
651	 */
652	/* %gs was restored by the trampoline. */
653	regs->tf_fs     = context->sc_fs;
654	regs->tf_es     = context->sc_es;
655	regs->tf_ds     = context->sc_ds;
656	regs->tf_edi    = context->sc_edi;
657	regs->tf_esi    = context->sc_esi;
658	regs->tf_ebp    = context->sc_ebp;
659	regs->tf_ebx    = context->sc_ebx;
660	regs->tf_edx    = context->sc_edx;
661	regs->tf_ecx    = context->sc_ecx;
662	regs->tf_eax    = context->sc_eax;
663	regs->tf_eip    = context->sc_eip;
664	regs->tf_cs     = context->sc_cs;
665	regs->tf_eflags = eflags;
666	regs->tf_esp    = context->sc_esp_at_signal;
667	regs->tf_ss     = context->sc_ss;
668
669	/*
670	 * call sigaltstack & ignore results..
671	 */
672	ss = stackgap_alloc(&sg, sizeof(stack_t));
673	lss = &uc.uc_stack;
674	ss->ss_sp = lss->ss_sp;
675	ss->ss_size = lss->ss_size;
676	ss->ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
677
678#ifdef DEBUG
679	if (ldebug(rt_sigreturn))
680		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
681		    ss->ss_flags, ss->ss_sp, ss->ss_size, context->sc_mask);
682#endif
683	sasargs.ss = ss;
684	sasargs.oss = NULL;
685	(void) sigaltstack(p, &sasargs);
686
687	return (EJUSTRETURN);
688}
689
690static void
691linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
692{
693	args[0] = tf->tf_ebx;
694	args[1] = tf->tf_ecx;
695	args[2] = tf->tf_edx;
696	args[3] = tf->tf_esi;
697	args[4] = tf->tf_edi;
698	*params = NULL;		/* no copyin */
699}
700
701/*
702 * If a linux binary is exec'ing something, try this image activator
703 * first.  We override standard shell script execution in order to
704 * be able to modify the interpreter path.  We only do this if a linux
705 * binary is doing the exec, so we do not create an EXEC module for it.
706 */
707static int	exec_linux_imgact_try __P((struct image_params *iparams));
708
709static int
710exec_linux_imgact_try(imgp)
711    struct image_params *imgp;
712{
713    const char *head = (const char *)imgp->image_header;
714    int error = -1;
715
716    /*
717     * The interpreter for shell scripts run from a linux binary needs
718     * to be located in /compat/linux if possible in order to recursively
719     * maintain linux path emulation.
720     */
721    if (((const short *)head)[0] == SHELLMAGIC) {
722	    /*
723	     * Run our normal shell image activator.  If it succeeds attempt
724	     * to use the alternate path for the interpreter.  If an alternate
725	     * path is found, use our stringspace to store it.
726	     */
727	    if ((error = exec_shell_imgact(imgp)) == 0) {
728		    char *rpath = NULL;
729
730		    linux_emul_find(imgp->proc, NULL, linux_emul_path,
731			imgp->interpreter_name, &rpath, 0);
732		    if (rpath != imgp->interpreter_name) {
733			    int len = strlen(rpath) + 1;
734
735			    if (len <= MAXSHELLCMDLEN) {
736				memcpy(imgp->interpreter_name, rpath, len);
737			    }
738			    free(rpath, M_TEMP);
739		    }
740	    }
741    }
742    return(error);
743}
744
745struct sysentvec linux_sysvec = {
746	LINUX_SYS_MAXSYSCALL,
747	linux_sysent,
748	0xff,
749	LINUX_SIGTBLSZ,
750	bsd_to_linux_signal,
751	ELAST + 1,
752	bsd_to_linux_errno,
753	translate_traps,
754	linux_fixup,
755	linux_sendsig,
756	linux_sigcode,
757	&linux_szsigcode,
758	linux_prepsyscall,
759	"Linux a.out",
760	aout_coredump,
761	exec_linux_imgact_try,
762	LINUX_MINSIGSTKSZ
763};
764
765struct sysentvec elf_linux_sysvec = {
766	LINUX_SYS_MAXSYSCALL,
767	linux_sysent,
768	0xff,
769	LINUX_SIGTBLSZ,
770	bsd_to_linux_signal,
771	ELAST + 1,
772	bsd_to_linux_errno,
773	translate_traps,
774	elf_linux_fixup,
775	linux_sendsig,
776	linux_sigcode,
777	&linux_szsigcode,
778	linux_prepsyscall,
779	"Linux ELF",
780	elf_coredump,
781	exec_linux_imgact_try,
782	LINUX_MINSIGSTKSZ
783};
784
785static Elf32_Brandinfo linux_brand = {
786					ELFOSABI_LINUX,
787					"Linux",
788					"/compat/linux",
789					"/lib/ld-linux.so.1",
790					&elf_linux_sysvec
791				 };
792
793static Elf32_Brandinfo linux_glibc2brand = {
794					ELFOSABI_LINUX,
795					"Linux",
796					"/compat/linux",
797					"/lib/ld-linux.so.2",
798					&elf_linux_sysvec
799				 };
800
801Elf32_Brandinfo *linux_brandlist[] = {
802					&linux_brand,
803					&linux_glibc2brand,
804					NULL
805				};
806
807static int
808linux_elf_modevent(module_t mod, int type, void *data)
809{
810	Elf32_Brandinfo **brandinfo;
811	int error;
812
813	error = 0;
814
815	switch(type) {
816	case MOD_LOAD:
817		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
818		     ++brandinfo)
819			if (elf_insert_brand_entry(*brandinfo) < 0)
820				error = EINVAL;
821		if (error == 0) {
822			linux_ioctl_register_handlers(
823				&linux_ioctl_handler_set);
824			if (bootverbose)
825				printf("Linux ELF exec handler installed\n");
826		} else
827			printf("cannot insert Linux ELF brand handler\n");
828		break;
829	case MOD_UNLOAD:
830		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
831		     ++brandinfo)
832			if (elf_brand_inuse(*brandinfo))
833				error = EBUSY;
834		if (error == 0) {
835			for (brandinfo = &linux_brandlist[0];
836			     *brandinfo != NULL; ++brandinfo)
837				if (elf_remove_brand_entry(*brandinfo) < 0)
838					error = EINVAL;
839		}
840		if (error == 0) {
841			linux_ioctl_unregister_handlers(
842				&linux_ioctl_handler_set);
843			if (bootverbose)
844				printf("Linux ELF exec handler removed\n");
845		} else
846			printf("Could not deinstall ELF interpreter entry\n");
847		break;
848	default:
849		break;
850	}
851	return error;
852}
853
854static moduledata_t linux_elf_mod = {
855	"linuxelf",
856	linux_elf_modevent,
857	0
858};
859
860DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
861