linux_sysvec.c revision 86647
1/*-
2 * Copyright (c) 1994-1996 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software withough specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: head/sys/i386/linux/linux_sysvec.c 86647 2001-11-20 09:39:31Z pb $
29 */
30
31/* XXX we use functions that might not exist. */
32#include "opt_compat.h"
33
34#ifndef COMPAT_43
35#error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36#endif
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/imgact.h>
41#include <sys/imgact_aout.h>
42#include <sys/imgact_elf.h>
43#include <sys/lock.h>
44#include <sys/malloc.h>
45#include <sys/mutex.h>
46#include <sys/proc.h>
47#include <sys/signalvar.h>
48#include <sys/sysent.h>
49#include <sys/sysproto.h>
50
51#include <vm/vm.h>
52#include <vm/vm_param.h>
53#include <vm/vm_page.h>
54#include <vm/vm_extern.h>
55#include <sys/exec.h>
56#include <sys/kernel.h>
57#include <sys/module.h>
58#include <machine/cpu.h>
59#include <sys/mutex.h>
60
61#include <i386/linux/linux.h>
62#include <i386/linux/linux_proto.h>
63#include <compat/linux/linux_signal.h>
64#include <compat/linux/linux_util.h>
65
66MODULE_VERSION(linux, 1);
67MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
68MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
69MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
70
71MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
72
73#if BYTE_ORDER == LITTLE_ENDIAN
74#define SHELLMAGIC      0x2123 /* #! */
75#else
76#define SHELLMAGIC      0x2321
77#endif
78
79/*
80 * Allow the sendsig functions to use the ldebug() facility
81 * even though they are not syscalls themselves. Map them
82 * to syscall 0. This is slightly less bogus than using
83 * ldebug(sigreturn).
84 */
85#define	LINUX_SYS_linux_rt_sendsig	0
86#define	LINUX_SYS_linux_sendsig		0
87
88extern char linux_sigcode[];
89extern int linux_szsigcode;
90
91extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
92
93SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
94
95static int	linux_fixup __P((register_t **stack_base,
96				 struct image_params *iparams));
97static int	elf_linux_fixup __P((register_t **stack_base,
98				     struct image_params *iparams));
99static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
100				       u_int *code, caddr_t *params));
101static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
102				   u_long code));
103
104/*
105 * Linux syscalls return negative errno's, we do positive and map them
106 */
107static int bsd_to_linux_errno[ELAST + 1] = {
108  	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
109 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
110 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
111 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
112 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
113	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
114	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
115	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
116  	-6, -6, -43, -42, -75, -6, -84
117};
118
119int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
120	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
121	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
122	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
123	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
124	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
125	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
126	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
127	0, LINUX_SIGUSR1, LINUX_SIGUSR2
128};
129
130int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
131	SIGHUP, SIGINT, SIGQUIT, SIGILL,
132	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
133	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
134	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
135	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
136	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
137	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
138	SIGIO, SIGURG, 0
139};
140
141#define LINUX_T_UNKNOWN  255
142static int _bsd_to_linux_trapcode[] = {
143	LINUX_T_UNKNOWN,	/* 0 */
144	6,			/* 1  T_PRIVINFLT */
145	LINUX_T_UNKNOWN,	/* 2 */
146	3,			/* 3  T_BPTFLT */
147	LINUX_T_UNKNOWN,	/* 4 */
148	LINUX_T_UNKNOWN,	/* 5 */
149	16,			/* 6  T_ARITHTRAP */
150	254,			/* 7  T_ASTFLT */
151	LINUX_T_UNKNOWN,	/* 8 */
152	13,			/* 9  T_PROTFLT */
153	1,			/* 10 T_TRCTRAP */
154	LINUX_T_UNKNOWN,	/* 11 */
155	14,			/* 12 T_PAGEFLT */
156	LINUX_T_UNKNOWN,	/* 13 */
157	17,			/* 14 T_ALIGNFLT */
158	LINUX_T_UNKNOWN,	/* 15 */
159	LINUX_T_UNKNOWN,	/* 16 */
160	LINUX_T_UNKNOWN,	/* 17 */
161	0,			/* 18 T_DIVIDE */
162	2,			/* 19 T_NMI */
163	4,			/* 20 T_OFLOW */
164	5,			/* 21 T_BOUND */
165	7,			/* 22 T_DNA */
166	8,			/* 23 T_DOUBLEFLT */
167	9,			/* 24 T_FPOPFLT */
168	10,			/* 25 T_TSSFLT */
169	11,			/* 26 T_SEGNPFLT */
170	12,			/* 27 T_STKFLT */
171	18,			/* 28 T_MCHK */
172	19,			/* 29 T_XMMFLT */
173	15			/* 30 T_RESERVED */
174};
175#define bsd_to_linux_trapcode(code) \
176    ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
177     _bsd_to_linux_trapcode[(code)]: \
178     LINUX_T_UNKNOWN)
179
180/*
181 * If FreeBSD & Linux have a difference of opinion about what a trap
182 * means, deal with it here.
183 *
184 * MPSAFE
185 */
186static int
187translate_traps(int signal, int trap_code)
188{
189	if (signal != SIGBUS)
190		return signal;
191	switch (trap_code) {
192	case T_PROTFLT:
193	case T_TSSFLT:
194	case T_DOUBLEFLT:
195	case T_PAGEFLT:
196		return SIGSEGV;
197	default:
198		return signal;
199	}
200}
201
202static int
203linux_fixup(register_t **stack_base, struct image_params *imgp)
204{
205	register_t *argv, *envp;
206
207	argv = *stack_base;
208	envp = *stack_base + (imgp->argc + 1);
209	(*stack_base)--;
210	**stack_base = (intptr_t)(void *)envp;
211	(*stack_base)--;
212	**stack_base = (intptr_t)(void *)argv;
213	(*stack_base)--;
214	**stack_base = imgp->argc;
215	return 0;
216}
217
218static int
219elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
220{
221	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
222	register_t *pos;
223
224	pos = *stack_base + (imgp->argc + imgp->envc + 2);
225
226	if (args->trace) {
227		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
228	}
229	if (args->execfd != -1) {
230		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
231	}
232	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
233	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
234	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
235	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
236	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
237	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
238	AUXARGS_ENTRY(pos, AT_BASE, args->base);
239	PROC_LOCK(imgp->proc);
240	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
241	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
242	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
243	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
244	PROC_UNLOCK(imgp->proc);
245	AUXARGS_ENTRY(pos, AT_NULL, 0);
246
247	free(imgp->auxargs, M_TEMP);
248	imgp->auxargs = NULL;
249
250	(*stack_base)--;
251	**stack_base = (long)imgp->argc;
252	return 0;
253}
254
255extern int _ucodesel, _udatasel;
256extern unsigned long linux_sznonrtsigcode;
257
258static void
259linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
260{
261	register struct thread *td = curthread;
262	register struct proc *p = td->td_proc;
263	register struct trapframe *regs;
264	struct l_rt_sigframe *fp, frame;
265	int oonstack;
266
267	PROC_LOCK_ASSERT(p, MA_OWNED);
268	regs = td->td_frame;
269	oonstack = sigonstack(regs->tf_esp);
270
271#ifdef DEBUG
272	if (ldebug(rt_sendsig))
273		printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
274		    catcher, sig, (void*)mask, code);
275#endif
276	/*
277	 * Allocate space for the signal handler context.
278	 */
279	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
280	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
281		fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp +
282		    p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe));
283	} else
284		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
285	PROC_UNLOCK(p);
286
287	/*
288	 * grow() will return FALSE if the fp will not fit inside the stack
289	 *	and the stack can not be grown. useracc will return FALSE
290	 *	if access is denied.
291	 */
292	if ((grow_stack (p, (int)fp) == FALSE) ||
293	    !useracc((caddr_t)fp, sizeof (struct l_rt_sigframe),
294	    VM_PROT_WRITE)) {
295		/*
296		 * Process has trashed its stack; give it an illegal
297		 * instruction to halt it in its tracks.
298		 */
299		PROC_LOCK(p);
300		SIGACTION(p, SIGILL) = SIG_DFL;
301		SIGDELSET(p->p_sigignore, SIGILL);
302		SIGDELSET(p->p_sigcatch, SIGILL);
303		SIGDELSET(p->p_sigmask, SIGILL);
304#ifdef DEBUG
305		if (ldebug(rt_sendsig))
306			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
307			    fp, oonstack);
308#endif
309		psignal(p, SIGILL);
310		return;
311	}
312
313	/*
314	 * Build the argument list for the signal handler.
315	 */
316	if (p->p_sysent->sv_sigtbl)
317		if (sig <= p->p_sysent->sv_sigsize)
318			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
319
320	frame.sf_handler = catcher;
321	frame.sf_sig = sig;
322	frame.sf_siginfo = &fp->sf_si;
323	frame.sf_ucontext = &fp->sf_sc;
324
325	/* Fill siginfo structure. */
326	frame.sf_si.lsi_signo = sig;
327	frame.sf_si.lsi_code = code;
328	frame.sf_si.lsi_addr = (void *)regs->tf_err;
329
330	/*
331	 * Build the signal context to be used by sigreturn.
332	 */
333	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
334	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
335
336	PROC_LOCK(p);
337	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
338	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
339	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
340	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
341	PROC_UNLOCK(p);
342
343	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
344
345	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
346	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
347	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
348	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
349	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
350	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
351	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
352	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
353	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
354	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
355	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
356	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
357	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
358	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
359	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
360	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
361	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
362	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
363	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
364
365#ifdef DEBUG
366	if (ldebug(rt_sendsig))
367		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
368		    frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
369		    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
370#endif
371
372	if (copyout(&frame, fp, sizeof(frame)) != 0) {
373		/*
374		 * Process has trashed its stack; give it an illegal
375		 * instruction to halt it in its tracks.
376		 */
377		PROC_LOCK(p);
378		sigexit(td, SIGILL);
379		/* NOTREACHED */
380	}
381
382	/*
383	 * Build context to run handler in.
384	 */
385	regs->tf_esp = (int)fp;
386	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
387	    linux_sznonrtsigcode;
388	regs->tf_eflags &= ~PSL_VM;
389	regs->tf_cs = _ucodesel;
390	regs->tf_ds = _udatasel;
391	regs->tf_es = _udatasel;
392	regs->tf_fs = _udatasel;
393	regs->tf_ss = _udatasel;
394	PROC_LOCK(p);
395}
396
397
398/*
399 * Send an interrupt to process.
400 *
401 * Stack is set up to allow sigcode stored
402 * in u. to call routine, followed by kcall
403 * to sigreturn routine below.  After sigreturn
404 * resets the signal mask, the stack, and the
405 * frame pointer, it returns to the user
406 * specified pc, psl.
407 */
408
409static void
410linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
411{
412	register struct thread *td = curthread;
413	register struct proc *p = td->td_proc;
414	register struct trapframe *regs;
415	struct l_sigframe *fp, frame;
416	l_sigset_t lmask;
417	int oonstack, i;
418
419	PROC_LOCK_ASSERT(p, MA_OWNED);
420	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
421		/* Signal handler installed with SA_SIGINFO. */
422		linux_rt_sendsig(catcher, sig, mask, code);
423		return;
424	}
425
426	regs = td->td_frame;
427	oonstack = sigonstack(regs->tf_esp);
428
429#ifdef DEBUG
430	if (ldebug(sendsig))
431		printf(ARGS(sendsig, "%p, %d, %p, %lu"),
432		    catcher, sig, (void*)mask, code);
433#endif
434
435	/*
436	 * Allocate space for the signal handler context.
437	 */
438	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
439	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
440		fp = (struct l_sigframe *)(p->p_sigstk.ss_sp +
441		    p->p_sigstk.ss_size - sizeof(struct l_sigframe));
442	} else
443		fp = (struct l_sigframe *)regs->tf_esp - 1;
444	PROC_UNLOCK(p);
445
446	/*
447	 * grow() will return FALSE if the fp will not fit inside the stack
448	 *	and the stack can not be grown. useracc will return FALSE
449	 *	if access is denied.
450	 */
451	if ((grow_stack (p, (int)fp) == FALSE) ||
452	    !useracc((caddr_t)fp, sizeof (struct l_sigframe),
453	    VM_PROT_WRITE)) {
454		/*
455		 * Process has trashed its stack; give it an illegal
456		 * instruction to halt it in its tracks.
457		 */
458		PROC_LOCK(p);
459		SIGACTION(p, SIGILL) = SIG_DFL;
460		SIGDELSET(p->p_sigignore, SIGILL);
461		SIGDELSET(p->p_sigcatch, SIGILL);
462		SIGDELSET(p->p_sigmask, SIGILL);
463		psignal(p, SIGILL);
464		return;
465	}
466
467	/*
468	 * Build the argument list for the signal handler.
469	 */
470	if (p->p_sysent->sv_sigtbl)
471		if (sig <= p->p_sysent->sv_sigsize)
472			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
473
474	frame.sf_handler = catcher;
475	frame.sf_sig = sig;
476
477	bsd_to_linux_sigset(mask, &lmask);
478
479	/*
480	 * Build the signal context to be used by sigreturn.
481	 */
482	frame.sf_sc.sc_mask   = lmask.__bits[0];
483	frame.sf_sc.sc_gs     = rgs();
484	frame.sf_sc.sc_fs     = regs->tf_fs;
485	frame.sf_sc.sc_es     = regs->tf_es;
486	frame.sf_sc.sc_ds     = regs->tf_ds;
487	frame.sf_sc.sc_edi    = regs->tf_edi;
488	frame.sf_sc.sc_esi    = regs->tf_esi;
489	frame.sf_sc.sc_ebp    = regs->tf_ebp;
490	frame.sf_sc.sc_ebx    = regs->tf_ebx;
491	frame.sf_sc.sc_edx    = regs->tf_edx;
492	frame.sf_sc.sc_ecx    = regs->tf_ecx;
493	frame.sf_sc.sc_eax    = regs->tf_eax;
494	frame.sf_sc.sc_eip    = regs->tf_eip;
495	frame.sf_sc.sc_cs     = regs->tf_cs;
496	frame.sf_sc.sc_eflags = regs->tf_eflags;
497	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
498	frame.sf_sc.sc_ss     = regs->tf_ss;
499	frame.sf_sc.sc_err    = regs->tf_err;
500	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
501
502	bzero(&frame.sf_fpstate, sizeof(struct l_fpstate));
503
504	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
505		frame.sf_extramask[i] = lmask.__bits[i+1];
506
507	if (copyout(&frame, fp, sizeof(frame)) != 0) {
508		/*
509		 * Process has trashed its stack; give it an illegal
510		 * instruction to halt it in its tracks.
511		 */
512		PROC_LOCK(p);
513		sigexit(td, SIGILL);
514		/* NOTREACHED */
515	}
516
517	/*
518	 * Build context to run handler in.
519	 */
520	regs->tf_esp = (int)fp;
521	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
522	regs->tf_eflags &= ~PSL_VM;
523	regs->tf_cs = _ucodesel;
524	regs->tf_ds = _udatasel;
525	regs->tf_es = _udatasel;
526	regs->tf_fs = _udatasel;
527	regs->tf_ss = _udatasel;
528	PROC_LOCK(p);
529}
530
531/*
532 * System call to cleanup state after a signal
533 * has been taken.  Reset signal mask and
534 * stack state from context left by sendsig (above).
535 * Return to previous pc and psl as specified by
536 * context left by sendsig. Check carefully to
537 * make sure that the user has not modified the
538 * psl to gain improper privileges or to cause
539 * a machine fault.
540 */
541int
542linux_sigreturn(td, args)
543	struct thread *td;
544	struct linux_sigreturn_args *args;
545{
546	struct proc *p = td->td_proc;
547	struct l_sigframe frame;
548	register struct trapframe *regs;
549	l_sigset_t lmask;
550	int eflags, i;
551
552	regs = td->td_frame;
553
554#ifdef DEBUG
555	if (ldebug(sigreturn))
556		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
557#endif
558	/*
559	 * The trampoline code hands us the sigframe.
560	 * It is unsafe to keep track of it ourselves, in the event that a
561	 * program jumps out of a signal handler.
562	 */
563	if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
564		return (EFAULT);
565
566	/*
567	 * Check for security violations.
568	 */
569#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
570	eflags = frame.sf_sc.sc_eflags;
571	/*
572	 * XXX do allow users to change the privileged flag PSL_RF.  The
573	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
574	 * sometimes set it there too.  tf_eflags is kept in the signal
575	 * context during signal handling and there is no other place
576	 * to remember it, so the PSL_RF bit may be corrupted by the
577	 * signal handler without us knowing.  Corruption of the PSL_RF
578	 * bit at worst causes one more or one less debugger trap, so
579	 * allowing it is fairly harmless.
580	 */
581	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
582    		return(EINVAL);
583	}
584
585	/*
586	 * Don't allow users to load a valid privileged %cs.  Let the
587	 * hardware check for invalid selectors, excess privilege in
588	 * other selectors, invalid %eip's and invalid %esp's.
589	 */
590#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
591	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
592		trapsignal(p, SIGBUS, T_PROTFLT);
593		return(EINVAL);
594	}
595
596	lmask.__bits[0] = frame.sf_sc.sc_mask;
597	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
598		lmask.__bits[i+1] = frame.sf_extramask[i];
599	PROC_LOCK(p);
600	linux_to_bsd_sigset(&lmask, &p->p_sigmask);
601	SIG_CANTMASK(p->p_sigmask);
602	PROC_UNLOCK(p);
603
604	/*
605	 * Restore signal context.
606	 */
607	/* %gs was restored by the trampoline. */
608	regs->tf_fs     = frame.sf_sc.sc_fs;
609	regs->tf_es     = frame.sf_sc.sc_es;
610	regs->tf_ds     = frame.sf_sc.sc_ds;
611	regs->tf_edi    = frame.sf_sc.sc_edi;
612	regs->tf_esi    = frame.sf_sc.sc_esi;
613	regs->tf_ebp    = frame.sf_sc.sc_ebp;
614	regs->tf_ebx    = frame.sf_sc.sc_ebx;
615	regs->tf_edx    = frame.sf_sc.sc_edx;
616	regs->tf_ecx    = frame.sf_sc.sc_ecx;
617	regs->tf_eax    = frame.sf_sc.sc_eax;
618	regs->tf_eip    = frame.sf_sc.sc_eip;
619	regs->tf_cs     = frame.sf_sc.sc_cs;
620	regs->tf_eflags = eflags;
621	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
622	regs->tf_ss     = frame.sf_sc.sc_ss;
623
624	return (EJUSTRETURN);
625}
626
627/*
628 * System call to cleanup state after a signal
629 * has been taken.  Reset signal mask and
630 * stack state from context left by rt_sendsig (above).
631 * Return to previous pc and psl as specified by
632 * context left by sendsig. Check carefully to
633 * make sure that the user has not modified the
634 * psl to gain improper privileges or to cause
635 * a machine fault.
636 */
637int
638linux_rt_sigreturn(td, args)
639	struct thread *td;
640	struct linux_rt_sigreturn_args *args;
641{
642	struct proc *p = td->td_proc;
643	struct sigaltstack_args sasargs;
644	struct l_ucontext uc;
645	struct l_sigcontext *context;
646	l_stack_t *lss;
647	stack_t *ss;
648	register struct trapframe *regs;
649	int eflags;
650	caddr_t sg = stackgap_init();
651
652	regs = td->td_frame;
653
654#ifdef DEBUG
655	if (ldebug(rt_sigreturn))
656		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
657#endif
658	/*
659	 * The trampoline code hands us the ucontext.
660	 * It is unsafe to keep track of it ourselves, in the event that a
661	 * program jumps out of a signal handler.
662	 */
663	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
664		return (EFAULT);
665
666	context = &uc.uc_mcontext;
667
668	/*
669	 * Check for security violations.
670	 */
671#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
672	eflags = context->sc_eflags;
673	/*
674	 * XXX do allow users to change the privileged flag PSL_RF.  The
675	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
676	 * sometimes set it there too.  tf_eflags is kept in the signal
677	 * context during signal handling and there is no other place
678	 * to remember it, so the PSL_RF bit may be corrupted by the
679	 * signal handler without us knowing.  Corruption of the PSL_RF
680	 * bit at worst causes one more or one less debugger trap, so
681	 * allowing it is fairly harmless.
682	 */
683	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
684    		return(EINVAL);
685	}
686
687	/*
688	 * Don't allow users to load a valid privileged %cs.  Let the
689	 * hardware check for invalid selectors, excess privilege in
690	 * other selectors, invalid %eip's and invalid %esp's.
691	 */
692#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
693	if (!CS_SECURE(context->sc_cs)) {
694		trapsignal(p, SIGBUS, T_PROTFLT);
695		return(EINVAL);
696	}
697
698	PROC_LOCK(p);
699	linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
700	SIG_CANTMASK(p->p_sigmask);
701	PROC_UNLOCK(p);
702
703	/*
704	 * Restore signal context
705	 */
706	/* %gs was restored by the trampoline. */
707	regs->tf_fs     = context->sc_fs;
708	regs->tf_es     = context->sc_es;
709	regs->tf_ds     = context->sc_ds;
710	regs->tf_edi    = context->sc_edi;
711	regs->tf_esi    = context->sc_esi;
712	regs->tf_ebp    = context->sc_ebp;
713	regs->tf_ebx    = context->sc_ebx;
714	regs->tf_edx    = context->sc_edx;
715	regs->tf_ecx    = context->sc_ecx;
716	regs->tf_eax    = context->sc_eax;
717	regs->tf_eip    = context->sc_eip;
718	regs->tf_cs     = context->sc_cs;
719	regs->tf_eflags = eflags;
720	regs->tf_esp    = context->sc_esp_at_signal;
721	regs->tf_ss     = context->sc_ss;
722
723	/*
724	 * call sigaltstack & ignore results..
725	 */
726	ss = stackgap_alloc(&sg, sizeof(stack_t));
727	lss = &uc.uc_stack;
728	ss->ss_sp = lss->ss_sp;
729	ss->ss_size = lss->ss_size;
730	ss->ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
731
732#ifdef DEBUG
733	if (ldebug(rt_sigreturn))
734		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
735		    ss->ss_flags, ss->ss_sp, ss->ss_size, context->sc_mask);
736#endif
737	sasargs.ss = ss;
738	sasargs.oss = NULL;
739	(void) sigaltstack(td, &sasargs);
740
741	return (EJUSTRETURN);
742}
743
744/*
745 * MPSAFE
746 */
747static void
748linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
749{
750	args[0] = tf->tf_ebx;
751	args[1] = tf->tf_ecx;
752	args[2] = tf->tf_edx;
753	args[3] = tf->tf_esi;
754	args[4] = tf->tf_edi;
755	*params = NULL;		/* no copyin */
756}
757
758/*
759 * If a linux binary is exec'ing something, try this image activator
760 * first.  We override standard shell script execution in order to
761 * be able to modify the interpreter path.  We only do this if a linux
762 * binary is doing the exec, so we do not create an EXEC module for it.
763 */
764static int	exec_linux_imgact_try __P((struct image_params *iparams));
765
766static int
767exec_linux_imgact_try(imgp)
768    struct image_params *imgp;
769{
770    const char *head = (const char *)imgp->image_header;
771    int error = -1;
772
773    /*
774     * The interpreter for shell scripts run from a linux binary needs
775     * to be located in /compat/linux if possible in order to recursively
776     * maintain linux path emulation.
777     */
778    if (((const short *)head)[0] == SHELLMAGIC) {
779	    /*
780	     * Run our normal shell image activator.  If it succeeds attempt
781	     * to use the alternate path for the interpreter.  If an alternate
782	     * path is found, use our stringspace to store it.
783	     */
784	    if ((error = exec_shell_imgact(imgp)) == 0) {
785		    char *rpath = NULL;
786
787		    linux_emul_find(&imgp->proc->p_thread, NULL,
788			imgp->interpreter_name, &rpath, 0);
789		    if (rpath != imgp->interpreter_name) {
790			    int len = strlen(rpath) + 1;
791
792			    if (len <= MAXSHELLCMDLEN) {
793				    memcpy(imgp->interpreter_name, rpath, len);
794			    }
795			    free(rpath, M_TEMP);
796		    }
797	    }
798    }
799    return(error);
800}
801
802struct sysentvec linux_sysvec = {
803	LINUX_SYS_MAXSYSCALL,
804	linux_sysent,
805	0xff,
806	LINUX_SIGTBLSZ,
807	bsd_to_linux_signal,
808	ELAST + 1,
809	bsd_to_linux_errno,
810	translate_traps,
811	linux_fixup,
812	linux_sendsig,
813	linux_sigcode,
814	&linux_szsigcode,
815	linux_prepsyscall,
816	"Linux a.out",
817	aout_coredump,
818	exec_linux_imgact_try,
819	LINUX_MINSIGSTKSZ
820};
821
822struct sysentvec elf_linux_sysvec = {
823	LINUX_SYS_MAXSYSCALL,
824	linux_sysent,
825	0xff,
826	LINUX_SIGTBLSZ,
827	bsd_to_linux_signal,
828	ELAST + 1,
829	bsd_to_linux_errno,
830	translate_traps,
831	elf_linux_fixup,
832	linux_sendsig,
833	linux_sigcode,
834	&linux_szsigcode,
835	linux_prepsyscall,
836	"Linux ELF",
837	elf_coredump,
838	exec_linux_imgact_try,
839	LINUX_MINSIGSTKSZ
840};
841
842static Elf32_Brandinfo linux_brand = {
843					ELFOSABI_LINUX,
844					"Linux",
845					"/compat/linux",
846					"/lib/ld-linux.so.1",
847					&elf_linux_sysvec
848				 };
849
850static Elf32_Brandinfo linux_glibc2brand = {
851					ELFOSABI_LINUX,
852					"Linux",
853					"/compat/linux",
854					"/lib/ld-linux.so.2",
855					&elf_linux_sysvec
856				 };
857
858Elf32_Brandinfo *linux_brandlist[] = {
859					&linux_brand,
860					&linux_glibc2brand,
861					NULL
862				};
863
864static int
865linux_elf_modevent(module_t mod, int type, void *data)
866{
867	Elf32_Brandinfo **brandinfo;
868	int error;
869	struct linux_ioctl_handler **lihp;
870
871	error = 0;
872
873	switch(type) {
874	case MOD_LOAD:
875		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
876		     ++brandinfo)
877			if (elf_insert_brand_entry(*brandinfo) < 0)
878				error = EINVAL;
879		if (error == 0) {
880			SET_FOREACH(lihp, linux_ioctl_handler_set)
881				linux_ioctl_register_handler(*lihp);
882			if (bootverbose)
883				printf("Linux ELF exec handler installed\n");
884		} else
885			printf("cannot insert Linux ELF brand handler\n");
886		break;
887	case MOD_UNLOAD:
888		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
889		     ++brandinfo)
890			if (elf_brand_inuse(*brandinfo))
891				error = EBUSY;
892		if (error == 0) {
893			for (brandinfo = &linux_brandlist[0];
894			     *brandinfo != NULL; ++brandinfo)
895				if (elf_remove_brand_entry(*brandinfo) < 0)
896					error = EINVAL;
897		}
898		if (error == 0) {
899			SET_FOREACH(lihp, linux_ioctl_handler_set)
900				linux_ioctl_unregister_handler(*lihp);
901			if (bootverbose)
902				printf("Linux ELF exec handler removed\n");
903		} else
904			printf("Could not deinstall ELF interpreter entry\n");
905		break;
906	default:
907		break;
908	}
909	return error;
910}
911
912static moduledata_t linux_elf_mod = {
913	"linuxelf",
914	linux_elf_modevent,
915	0
916};
917
918DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
919