linux_sysvec.c revision 102814
1/*-
2 * Copyright (c) 1994-1996 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: head/sys/i386/linux/linux_sysvec.c 102814 2002-09-01 22:30:27Z iedowse $
29 */
30
31/* XXX we use functions that might not exist. */
32#include "opt_compat.h"
33
34#ifndef COMPAT_43
35#error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36#endif
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/imgact.h>
41#include <sys/imgact_aout.h>
42#include <sys/imgact_elf.h>
43#include <sys/lock.h>
44#include <sys/malloc.h>
45#include <sys/mutex.h>
46#include <sys/proc.h>
47#include <sys/signalvar.h>
48#include <sys/syscallsubr.h>
49#include <sys/sysent.h>
50#include <sys/sysproto.h>
51
52#include <vm/vm.h>
53#include <vm/vm_param.h>
54#include <vm/vm_page.h>
55#include <vm/vm_extern.h>
56#include <sys/exec.h>
57#include <sys/kernel.h>
58#include <sys/module.h>
59#include <machine/cpu.h>
60#include <sys/mutex.h>
61
62#include <i386/linux/linux.h>
63#include <i386/linux/linux_proto.h>
64#include <compat/linux/linux_signal.h>
65#include <compat/linux/linux_util.h>
66
67MODULE_VERSION(linux, 1);
68MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
69MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
70MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
71
72MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
73
74#if BYTE_ORDER == LITTLE_ENDIAN
75#define SHELLMAGIC      0x2123 /* #! */
76#else
77#define SHELLMAGIC      0x2321
78#endif
79
80/*
81 * Allow the sendsig functions to use the ldebug() facility
82 * even though they are not syscalls themselves. Map them
83 * to syscall 0. This is slightly less bogus than using
84 * ldebug(sigreturn).
85 */
86#define	LINUX_SYS_linux_rt_sendsig	0
87#define	LINUX_SYS_linux_sendsig		0
88
89extern char linux_sigcode[];
90extern int linux_szsigcode;
91
92extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
93
94SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
95
96static int	linux_fixup(register_t **stack_base,
97		    struct image_params *iparams);
98static int	elf_linux_fixup(register_t **stack_base,
99		    struct image_params *iparams);
100static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
101		    caddr_t *params);
102static void     linux_sendsig(sig_t catcher, int sig, sigset_t *mask,
103		    u_long code);
104
105/*
106 * Linux syscalls return negative errno's, we do positive and map them
107 */
108static int bsd_to_linux_errno[ELAST + 1] = {
109  	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
110 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
111 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
112 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
113 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
114	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
115	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
116	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
117  	-6, -6, -43, -42, -75, -6, -84
118};
119
120int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
121	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
122	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
123	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
124	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
125	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
126	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
127	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
128	0, LINUX_SIGUSR1, LINUX_SIGUSR2
129};
130
131int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
132	SIGHUP, SIGINT, SIGQUIT, SIGILL,
133	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
134	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
135	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
136	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
137	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
138	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
139	SIGIO, SIGURG, 0
140};
141
142#define LINUX_T_UNKNOWN  255
143static int _bsd_to_linux_trapcode[] = {
144	LINUX_T_UNKNOWN,	/* 0 */
145	6,			/* 1  T_PRIVINFLT */
146	LINUX_T_UNKNOWN,	/* 2 */
147	3,			/* 3  T_BPTFLT */
148	LINUX_T_UNKNOWN,	/* 4 */
149	LINUX_T_UNKNOWN,	/* 5 */
150	16,			/* 6  T_ARITHTRAP */
151	254,			/* 7  T_ASTFLT */
152	LINUX_T_UNKNOWN,	/* 8 */
153	13,			/* 9  T_PROTFLT */
154	1,			/* 10 T_TRCTRAP */
155	LINUX_T_UNKNOWN,	/* 11 */
156	14,			/* 12 T_PAGEFLT */
157	LINUX_T_UNKNOWN,	/* 13 */
158	17,			/* 14 T_ALIGNFLT */
159	LINUX_T_UNKNOWN,	/* 15 */
160	LINUX_T_UNKNOWN,	/* 16 */
161	LINUX_T_UNKNOWN,	/* 17 */
162	0,			/* 18 T_DIVIDE */
163	2,			/* 19 T_NMI */
164	4,			/* 20 T_OFLOW */
165	5,			/* 21 T_BOUND */
166	7,			/* 22 T_DNA */
167	8,			/* 23 T_DOUBLEFLT */
168	9,			/* 24 T_FPOPFLT */
169	10,			/* 25 T_TSSFLT */
170	11,			/* 26 T_SEGNPFLT */
171	12,			/* 27 T_STKFLT */
172	18,			/* 28 T_MCHK */
173	19,			/* 29 T_XMMFLT */
174	15			/* 30 T_RESERVED */
175};
176#define bsd_to_linux_trapcode(code) \
177    ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
178     _bsd_to_linux_trapcode[(code)]: \
179     LINUX_T_UNKNOWN)
180
181/*
182 * If FreeBSD & Linux have a difference of opinion about what a trap
183 * means, deal with it here.
184 *
185 * MPSAFE
186 */
187static int
188translate_traps(int signal, int trap_code)
189{
190	if (signal != SIGBUS)
191		return signal;
192	switch (trap_code) {
193	case T_PROTFLT:
194	case T_TSSFLT:
195	case T_DOUBLEFLT:
196	case T_PAGEFLT:
197		return SIGSEGV;
198	default:
199		return signal;
200	}
201}
202
203static int
204linux_fixup(register_t **stack_base, struct image_params *imgp)
205{
206	register_t *argv, *envp;
207
208	argv = *stack_base;
209	envp = *stack_base + (imgp->argc + 1);
210	(*stack_base)--;
211	**stack_base = (intptr_t)(void *)envp;
212	(*stack_base)--;
213	**stack_base = (intptr_t)(void *)argv;
214	(*stack_base)--;
215	**stack_base = imgp->argc;
216	return 0;
217}
218
219static int
220elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
221{
222	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
223	register_t *pos;
224
225	pos = *stack_base + (imgp->argc + imgp->envc + 2);
226
227	if (args->trace) {
228		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
229	}
230	if (args->execfd != -1) {
231		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
232	}
233	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
234	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
235	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
236	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
237	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
238	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
239	AUXARGS_ENTRY(pos, AT_BASE, args->base);
240	PROC_LOCK(imgp->proc);
241	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
242	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
243	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
244	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
245	PROC_UNLOCK(imgp->proc);
246	AUXARGS_ENTRY(pos, AT_NULL, 0);
247
248	free(imgp->auxargs, M_TEMP);
249	imgp->auxargs = NULL;
250
251	(*stack_base)--;
252	**stack_base = (long)imgp->argc;
253	return 0;
254}
255
256extern int _ucodesel, _udatasel;
257extern unsigned long linux_sznonrtsigcode;
258
259static void
260linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
261{
262	register struct thread *td = curthread;
263	register struct proc *p = td->td_proc;
264	register struct trapframe *regs;
265	struct l_rt_sigframe *fp, frame;
266	int oonstack;
267
268	PROC_LOCK_ASSERT(p, MA_OWNED);
269	regs = td->td_frame;
270	oonstack = sigonstack(regs->tf_esp);
271
272#ifdef DEBUG
273	if (ldebug(rt_sendsig))
274		printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
275		    catcher, sig, (void*)mask, code);
276#endif
277	/*
278	 * Allocate space for the signal handler context.
279	 */
280	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
281	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
282		fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp +
283		    p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe));
284	} else
285		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
286	PROC_UNLOCK(p);
287
288	/*
289	 * Build the argument list for the signal handler.
290	 */
291	if (p->p_sysent->sv_sigtbl)
292		if (sig <= p->p_sysent->sv_sigsize)
293			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
294
295	frame.sf_handler = catcher;
296	frame.sf_sig = sig;
297	frame.sf_siginfo = &fp->sf_si;
298	frame.sf_ucontext = &fp->sf_sc;
299
300	/* Fill siginfo structure. */
301	frame.sf_si.lsi_signo = sig;
302	frame.sf_si.lsi_code = code;
303	frame.sf_si.lsi_addr = (void *)regs->tf_err;
304
305	/*
306	 * Build the signal context to be used by sigreturn.
307	 */
308	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
309	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
310
311	PROC_LOCK(p);
312	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
313	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
314	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
315	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
316	PROC_UNLOCK(p);
317
318	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
319
320	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
321	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
322	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
323	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
324	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
325	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
326	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
327	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
328	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
329	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
330	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
331	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
332	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
333	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
334	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
335	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
336	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
337	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
338	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
339
340#ifdef DEBUG
341	if (ldebug(rt_sendsig))
342		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
343		    frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
344		    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
345#endif
346
347	if (copyout(&frame, fp, sizeof(frame)) != 0) {
348		/*
349		 * Process has trashed its stack; give it an illegal
350		 * instruction to halt it in its tracks.
351		 */
352#ifdef DEBUG
353		if (ldebug(rt_sendsig))
354			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
355			    fp, oonstack);
356#endif
357		PROC_LOCK(p);
358		sigexit(td, SIGILL);
359	}
360
361	/*
362	 * Build context to run handler in.
363	 */
364	regs->tf_esp = (int)fp;
365	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
366	    linux_sznonrtsigcode;
367	regs->tf_eflags &= ~(PSL_T | PSL_VM);
368	regs->tf_cs = _ucodesel;
369	regs->tf_ds = _udatasel;
370	regs->tf_es = _udatasel;
371	regs->tf_fs = _udatasel;
372	regs->tf_ss = _udatasel;
373	PROC_LOCK(p);
374}
375
376
377/*
378 * Send an interrupt to process.
379 *
380 * Stack is set up to allow sigcode stored
381 * in u. to call routine, followed by kcall
382 * to sigreturn routine below.  After sigreturn
383 * resets the signal mask, the stack, and the
384 * frame pointer, it returns to the user
385 * specified pc, psl.
386 */
387
388static void
389linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
390{
391	register struct thread *td = curthread;
392	register struct proc *p = td->td_proc;
393	register struct trapframe *regs;
394	struct l_sigframe *fp, frame;
395	l_sigset_t lmask;
396	int oonstack, i;
397
398	PROC_LOCK_ASSERT(p, MA_OWNED);
399	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
400		/* Signal handler installed with SA_SIGINFO. */
401		linux_rt_sendsig(catcher, sig, mask, code);
402		return;
403	}
404
405	regs = td->td_frame;
406	oonstack = sigonstack(regs->tf_esp);
407
408#ifdef DEBUG
409	if (ldebug(sendsig))
410		printf(ARGS(sendsig, "%p, %d, %p, %lu"),
411		    catcher, sig, (void*)mask, code);
412#endif
413
414	/*
415	 * Allocate space for the signal handler context.
416	 */
417	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
418	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
419		fp = (struct l_sigframe *)(p->p_sigstk.ss_sp +
420		    p->p_sigstk.ss_size - sizeof(struct l_sigframe));
421	} else
422		fp = (struct l_sigframe *)regs->tf_esp - 1;
423	PROC_UNLOCK(p);
424
425	/*
426	 * Build the argument list for the signal handler.
427	 */
428	if (p->p_sysent->sv_sigtbl)
429		if (sig <= p->p_sysent->sv_sigsize)
430			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
431
432	frame.sf_handler = catcher;
433	frame.sf_sig = sig;
434
435	bsd_to_linux_sigset(mask, &lmask);
436
437	/*
438	 * Build the signal context to be used by sigreturn.
439	 */
440	frame.sf_sc.sc_mask   = lmask.__bits[0];
441	frame.sf_sc.sc_gs     = rgs();
442	frame.sf_sc.sc_fs     = regs->tf_fs;
443	frame.sf_sc.sc_es     = regs->tf_es;
444	frame.sf_sc.sc_ds     = regs->tf_ds;
445	frame.sf_sc.sc_edi    = regs->tf_edi;
446	frame.sf_sc.sc_esi    = regs->tf_esi;
447	frame.sf_sc.sc_ebp    = regs->tf_ebp;
448	frame.sf_sc.sc_ebx    = regs->tf_ebx;
449	frame.sf_sc.sc_edx    = regs->tf_edx;
450	frame.sf_sc.sc_ecx    = regs->tf_ecx;
451	frame.sf_sc.sc_eax    = regs->tf_eax;
452	frame.sf_sc.sc_eip    = regs->tf_eip;
453	frame.sf_sc.sc_cs     = regs->tf_cs;
454	frame.sf_sc.sc_eflags = regs->tf_eflags;
455	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
456	frame.sf_sc.sc_ss     = regs->tf_ss;
457	frame.sf_sc.sc_err    = regs->tf_err;
458	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
459
460	bzero(&frame.sf_fpstate, sizeof(struct l_fpstate));
461
462	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
463		frame.sf_extramask[i] = lmask.__bits[i+1];
464
465	if (copyout(&frame, fp, sizeof(frame)) != 0) {
466		/*
467		 * Process has trashed its stack; give it an illegal
468		 * instruction to halt it in its tracks.
469		 */
470		PROC_LOCK(p);
471		sigexit(td, SIGILL);
472	}
473
474	/*
475	 * Build context to run handler in.
476	 */
477	regs->tf_esp = (int)fp;
478	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
479	regs->tf_eflags &= ~(PSL_T | PSL_VM);
480	regs->tf_cs = _ucodesel;
481	regs->tf_ds = _udatasel;
482	regs->tf_es = _udatasel;
483	regs->tf_fs = _udatasel;
484	regs->tf_ss = _udatasel;
485	PROC_LOCK(p);
486}
487
488/*
489 * System call to cleanup state after a signal
490 * has been taken.  Reset signal mask and
491 * stack state from context left by sendsig (above).
492 * Return to previous pc and psl as specified by
493 * context left by sendsig. Check carefully to
494 * make sure that the user has not modified the
495 * psl to gain improper privileges or to cause
496 * a machine fault.
497 */
498int
499linux_sigreturn(td, args)
500	struct thread *td;
501	struct linux_sigreturn_args *args;
502{
503	struct proc *p = td->td_proc;
504	struct l_sigframe frame;
505	register struct trapframe *regs;
506	l_sigset_t lmask;
507	int eflags, i;
508
509	regs = td->td_frame;
510
511#ifdef DEBUG
512	if (ldebug(sigreturn))
513		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
514#endif
515	/*
516	 * The trampoline code hands us the sigframe.
517	 * It is unsafe to keep track of it ourselves, in the event that a
518	 * program jumps out of a signal handler.
519	 */
520	if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
521		return (EFAULT);
522
523	/*
524	 * Check for security violations.
525	 */
526#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
527	eflags = frame.sf_sc.sc_eflags;
528	/*
529	 * XXX do allow users to change the privileged flag PSL_RF.  The
530	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
531	 * sometimes set it there too.  tf_eflags is kept in the signal
532	 * context during signal handling and there is no other place
533	 * to remember it, so the PSL_RF bit may be corrupted by the
534	 * signal handler without us knowing.  Corruption of the PSL_RF
535	 * bit at worst causes one more or one less debugger trap, so
536	 * allowing it is fairly harmless.
537	 */
538	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
539    		return(EINVAL);
540	}
541
542	/*
543	 * Don't allow users to load a valid privileged %cs.  Let the
544	 * hardware check for invalid selectors, excess privilege in
545	 * other selectors, invalid %eip's and invalid %esp's.
546	 */
547#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
548	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
549		trapsignal(p, SIGBUS, T_PROTFLT);
550		return(EINVAL);
551	}
552
553	lmask.__bits[0] = frame.sf_sc.sc_mask;
554	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
555		lmask.__bits[i+1] = frame.sf_extramask[i];
556	PROC_LOCK(p);
557	linux_to_bsd_sigset(&lmask, &p->p_sigmask);
558	SIG_CANTMASK(p->p_sigmask);
559	signotify(p);
560	PROC_UNLOCK(p);
561
562	/*
563	 * Restore signal context.
564	 */
565	/* %gs was restored by the trampoline. */
566	regs->tf_fs     = frame.sf_sc.sc_fs;
567	regs->tf_es     = frame.sf_sc.sc_es;
568	regs->tf_ds     = frame.sf_sc.sc_ds;
569	regs->tf_edi    = frame.sf_sc.sc_edi;
570	regs->tf_esi    = frame.sf_sc.sc_esi;
571	regs->tf_ebp    = frame.sf_sc.sc_ebp;
572	regs->tf_ebx    = frame.sf_sc.sc_ebx;
573	regs->tf_edx    = frame.sf_sc.sc_edx;
574	regs->tf_ecx    = frame.sf_sc.sc_ecx;
575	regs->tf_eax    = frame.sf_sc.sc_eax;
576	regs->tf_eip    = frame.sf_sc.sc_eip;
577	regs->tf_cs     = frame.sf_sc.sc_cs;
578	regs->tf_eflags = eflags;
579	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
580	regs->tf_ss     = frame.sf_sc.sc_ss;
581
582	return (EJUSTRETURN);
583}
584
585/*
586 * System call to cleanup state after a signal
587 * has been taken.  Reset signal mask and
588 * stack state from context left by rt_sendsig (above).
589 * Return to previous pc and psl as specified by
590 * context left by sendsig. Check carefully to
591 * make sure that the user has not modified the
592 * psl to gain improper privileges or to cause
593 * a machine fault.
594 */
595int
596linux_rt_sigreturn(td, args)
597	struct thread *td;
598	struct linux_rt_sigreturn_args *args;
599{
600	struct proc *p = td->td_proc;
601	struct l_ucontext uc;
602	struct l_sigcontext *context;
603	l_stack_t *lss;
604	stack_t ss;
605	register struct trapframe *regs;
606	int eflags;
607
608	regs = td->td_frame;
609
610#ifdef DEBUG
611	if (ldebug(rt_sigreturn))
612		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
613#endif
614	/*
615	 * The trampoline code hands us the ucontext.
616	 * It is unsafe to keep track of it ourselves, in the event that a
617	 * program jumps out of a signal handler.
618	 */
619	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
620		return (EFAULT);
621
622	context = &uc.uc_mcontext;
623
624	/*
625	 * Check for security violations.
626	 */
627#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
628	eflags = context->sc_eflags;
629	/*
630	 * XXX do allow users to change the privileged flag PSL_RF.  The
631	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
632	 * sometimes set it there too.  tf_eflags is kept in the signal
633	 * context during signal handling and there is no other place
634	 * to remember it, so the PSL_RF bit may be corrupted by the
635	 * signal handler without us knowing.  Corruption of the PSL_RF
636	 * bit at worst causes one more or one less debugger trap, so
637	 * allowing it is fairly harmless.
638	 */
639	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
640    		return(EINVAL);
641	}
642
643	/*
644	 * Don't allow users to load a valid privileged %cs.  Let the
645	 * hardware check for invalid selectors, excess privilege in
646	 * other selectors, invalid %eip's and invalid %esp's.
647	 */
648#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
649	if (!CS_SECURE(context->sc_cs)) {
650		trapsignal(p, SIGBUS, T_PROTFLT);
651		return(EINVAL);
652	}
653
654	PROC_LOCK(p);
655	linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
656	SIG_CANTMASK(p->p_sigmask);
657	signotify(p);
658	PROC_UNLOCK(p);
659
660	/*
661	 * Restore signal context
662	 */
663	/* %gs was restored by the trampoline. */
664	regs->tf_fs     = context->sc_fs;
665	regs->tf_es     = context->sc_es;
666	regs->tf_ds     = context->sc_ds;
667	regs->tf_edi    = context->sc_edi;
668	regs->tf_esi    = context->sc_esi;
669	regs->tf_ebp    = context->sc_ebp;
670	regs->tf_ebx    = context->sc_ebx;
671	regs->tf_edx    = context->sc_edx;
672	regs->tf_ecx    = context->sc_ecx;
673	regs->tf_eax    = context->sc_eax;
674	regs->tf_eip    = context->sc_eip;
675	regs->tf_cs     = context->sc_cs;
676	regs->tf_eflags = eflags;
677	regs->tf_esp    = context->sc_esp_at_signal;
678	regs->tf_ss     = context->sc_ss;
679
680	/*
681	 * call sigaltstack & ignore results..
682	 */
683	lss = &uc.uc_stack;
684	ss.ss_sp = lss->ss_sp;
685	ss.ss_size = lss->ss_size;
686	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
687
688#ifdef DEBUG
689	if (ldebug(rt_sigreturn))
690		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
691		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
692#endif
693	(void)kern_sigaltstack(td, &ss, NULL);
694
695	return (EJUSTRETURN);
696}
697
698/*
699 * MPSAFE
700 */
701static void
702linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
703{
704	args[0] = tf->tf_ebx;
705	args[1] = tf->tf_ecx;
706	args[2] = tf->tf_edx;
707	args[3] = tf->tf_esi;
708	args[4] = tf->tf_edi;
709	*params = NULL;		/* no copyin */
710}
711
712/*
713 * If a linux binary is exec'ing something, try this image activator
714 * first.  We override standard shell script execution in order to
715 * be able to modify the interpreter path.  We only do this if a linux
716 * binary is doing the exec, so we do not create an EXEC module for it.
717 */
718static int	exec_linux_imgact_try(struct image_params *iparams);
719
720static int
721exec_linux_imgact_try(imgp)
722    struct image_params *imgp;
723{
724    const char *head = (const char *)imgp->image_header;
725    int error = -1;
726
727    /*
728     * The interpreter for shell scripts run from a linux binary needs
729     * to be located in /compat/linux if possible in order to recursively
730     * maintain linux path emulation.
731     */
732    if (((const short *)head)[0] == SHELLMAGIC) {
733	    /*
734	     * Run our normal shell image activator.  If it succeeds attempt
735	     * to use the alternate path for the interpreter.  If an alternate
736	     * path is found, use our stringspace to store it.
737	     */
738	    if ((error = exec_shell_imgact(imgp)) == 0) {
739		    char *rpath = NULL;
740
741		    linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL,
742			imgp->interpreter_name, &rpath, 0);
743		    if (rpath != imgp->interpreter_name) {
744			    int len = strlen(rpath) + 1;
745
746			    if (len <= MAXSHELLCMDLEN) {
747				    memcpy(imgp->interpreter_name, rpath, len);
748			    }
749			    free(rpath, M_TEMP);
750		    }
751	    }
752    }
753    return(error);
754}
755
756struct sysentvec linux_sysvec = {
757	LINUX_SYS_MAXSYSCALL,
758	linux_sysent,
759	0xff,
760	LINUX_SIGTBLSZ,
761	bsd_to_linux_signal,
762	ELAST + 1,
763	bsd_to_linux_errno,
764	translate_traps,
765	linux_fixup,
766	linux_sendsig,
767	linux_sigcode,
768	&linux_szsigcode,
769	linux_prepsyscall,
770	"Linux a.out",
771	aout_coredump,
772	exec_linux_imgact_try,
773	LINUX_MINSIGSTKSZ,
774	PAGE_SIZE,
775	VM_MIN_ADDRESS,
776	VM_MAXUSER_ADDRESS,
777	USRSTACK,
778	PS_STRINGS,
779	VM_PROT_ALL,
780	exec_copyout_strings,
781	exec_setregs
782};
783
784struct sysentvec elf_linux_sysvec = {
785	LINUX_SYS_MAXSYSCALL,
786	linux_sysent,
787	0xff,
788	LINUX_SIGTBLSZ,
789	bsd_to_linux_signal,
790	ELAST + 1,
791	bsd_to_linux_errno,
792	translate_traps,
793	elf_linux_fixup,
794	linux_sendsig,
795	linux_sigcode,
796	&linux_szsigcode,
797	linux_prepsyscall,
798	"Linux ELF",
799	elf32_coredump,
800	exec_linux_imgact_try,
801	LINUX_MINSIGSTKSZ,
802	PAGE_SIZE,
803	VM_MIN_ADDRESS,
804	VM_MAXUSER_ADDRESS,
805	USRSTACK,
806	PS_STRINGS,
807	VM_PROT_ALL,
808	exec_copyout_strings,
809	exec_setregs
810};
811
812static Elf32_Brandinfo linux_brand = {
813					ELFOSABI_LINUX,
814					EM_386,
815					"Linux",
816					"/compat/linux",
817					"/lib/ld-linux.so.1",
818					&elf_linux_sysvec
819				 };
820
821static Elf32_Brandinfo linux_glibc2brand = {
822					ELFOSABI_LINUX,
823					EM_386,
824					"Linux",
825					"/compat/linux",
826					"/lib/ld-linux.so.2",
827					&elf_linux_sysvec
828				 };
829
830Elf32_Brandinfo *linux_brandlist[] = {
831					&linux_brand,
832					&linux_glibc2brand,
833					NULL
834				};
835
836static int
837linux_elf_modevent(module_t mod, int type, void *data)
838{
839	Elf32_Brandinfo **brandinfo;
840	int error;
841	struct linux_ioctl_handler **lihp;
842
843	error = 0;
844
845	switch(type) {
846	case MOD_LOAD:
847		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
848		     ++brandinfo)
849			if (elf32_insert_brand_entry(*brandinfo) < 0)
850				error = EINVAL;
851		if (error == 0) {
852			SET_FOREACH(lihp, linux_ioctl_handler_set)
853				linux_ioctl_register_handler(*lihp);
854			if (bootverbose)
855				printf("Linux ELF exec handler installed\n");
856		} else
857			printf("cannot insert Linux ELF brand handler\n");
858		break;
859	case MOD_UNLOAD:
860		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
861		     ++brandinfo)
862			if (elf32_brand_inuse(*brandinfo))
863				error = EBUSY;
864		if (error == 0) {
865			for (brandinfo = &linux_brandlist[0];
866			     *brandinfo != NULL; ++brandinfo)
867				if (elf32_remove_brand_entry(*brandinfo) < 0)
868					error = EINVAL;
869		}
870		if (error == 0) {
871			SET_FOREACH(lihp, linux_ioctl_handler_set)
872				linux_ioctl_unregister_handler(*lihp);
873			if (bootverbose)
874				printf("Linux ELF exec handler removed\n");
875		} else
876			printf("Could not deinstall ELF interpreter entry\n");
877		break;
878	default:
879		break;
880	}
881	return error;
882}
883
884static moduledata_t linux_elf_mod = {
885	"linuxelf",
886	linux_elf_modevent,
887	0
888};
889
890DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
891