linux_sysvec.c revision 68520
1/*-
2 * Copyright (c) 1994-1996 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software withough specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: head/sys/i386/linux/linux_sysvec.c 68520 2000-11-09 08:25:48Z marcel $
29 */
30
31/* XXX we use functions that might not exist. */
32#include "opt_compat.h"
33
34#ifndef COMPAT_43
35#error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36#endif
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/proc.h>
41#include <sys/sysproto.h>
42#include <sys/sysent.h>
43#include <sys/imgact.h>
44#include <sys/imgact_aout.h>
45#include <sys/imgact_elf.h>
46#include <sys/signalvar.h>
47#include <sys/malloc.h>
48#include <vm/vm.h>
49#include <vm/vm_param.h>
50#include <vm/vm_page.h>
51#include <vm/vm_extern.h>
52#include <sys/exec.h>
53#include <sys/kernel.h>
54#include <sys/module.h>
55#include <machine/cpu.h>
56
57#include <i386/linux/linux.h>
58#include <linux_proto.h>
59#include <compat/linux/linux_util.h>
60
61MODULE_VERSION(linux, 1);
62
63MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
64
65#if BYTE_ORDER == LITTLE_ENDIAN
66#define SHELLMAGIC      0x2123 /* #! */
67#else
68#define SHELLMAGIC      0x2321
69#endif
70
71extern char linux_sigcode[];
72extern int linux_szsigcode;
73
74extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
75
76extern struct linker_set linux_ioctl_handler_set;
77
78static int	linux_fixup __P((register_t **stack_base,
79				 struct image_params *iparams));
80static int	elf_linux_fixup __P((register_t **stack_base,
81				     struct image_params *iparams));
82static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
83				       u_int *code, caddr_t *params));
84static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
85				   u_long code));
86
87/*
88 * Linux syscalls return negative errno's, we do positive and map them
89 */
90static int bsd_to_linux_errno[ELAST + 1] = {
91  	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
92 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
93 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
94 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
95 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
96	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
97	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
98	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
99  	-6, -6, -43, -42, -75, -6, -84
100};
101
102int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
103	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
104	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
105	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
106	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
107	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
108	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
109	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
110	0, LINUX_SIGUSR1, LINUX_SIGUSR2
111};
112
113int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
114	SIGHUP, SIGINT, SIGQUIT, SIGILL,
115	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
116	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
117	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
118	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
119	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
120	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
121	SIGIO, SIGURG, 0
122};
123
124/*
125 * If FreeBSD & Linux have a difference of opinion about what a trap
126 * means, deal with it here.
127 */
128static int
129translate_traps(int signal, int trap_code)
130{
131	if (signal != SIGBUS)
132		return signal;
133	switch (trap_code) {
134	case T_PROTFLT:
135	case T_TSSFLT:
136	case T_DOUBLEFLT:
137	case T_PAGEFLT:
138		return SIGSEGV;
139	default:
140		return signal;
141	}
142}
143
144static int
145linux_fixup(register_t **stack_base, struct image_params *imgp)
146{
147	register_t *argv, *envp;
148
149	argv = *stack_base;
150	envp = *stack_base + (imgp->argc + 1);
151	(*stack_base)--;
152	**stack_base = (intptr_t)(void *)envp;
153	(*stack_base)--;
154	**stack_base = (intptr_t)(void *)argv;
155	(*stack_base)--;
156	**stack_base = imgp->argc;
157	return 0;
158}
159
160static int
161elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
162{
163	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
164	register_t *pos;
165
166	pos = *stack_base + (imgp->argc + imgp->envc + 2);
167
168	if (args->trace) {
169		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
170	}
171	if (args->execfd != -1) {
172		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
173	}
174	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
175	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
176	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
177	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
178	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
179	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
180	AUXARGS_ENTRY(pos, AT_BASE, args->base);
181	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid);
182	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid);
183	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid);
184	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid);
185	AUXARGS_ENTRY(pos, AT_NULL, 0);
186
187	free(imgp->auxargs, M_TEMP);
188	imgp->auxargs = NULL;
189
190	(*stack_base)--;
191	**stack_base = (long)imgp->argc;
192	return 0;
193}
194
195extern int _ucodesel, _udatasel;
196extern unsigned long _linux_sznonrtsigcode;
197
198static void
199linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
200{
201	register struct proc *p = curproc;
202	register struct trapframe *regs;
203	struct linux_rt_sigframe *fp, frame;
204	struct sigacts *psp = p->p_sigacts;
205	int oonstack;
206
207	regs = p->p_md.md_regs;
208	oonstack = p->p_sigstk.ss_flags & SS_ONSTACK;
209
210#ifdef DEBUG
211	printf("Linux-emul(%ld): linux_rt_sendsig(%p, %d, %p, %lu)\n",
212	    (long)p->p_pid, catcher, sig, (void*)mask, code);
213#endif
214	/*
215	 * Allocate space for the signal handler context.
216	 */
217	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
218	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
219		fp = (struct linux_rt_sigframe *)(p->p_sigstk.ss_sp +
220		    p->p_sigstk.ss_size - sizeof(struct linux_rt_sigframe));
221		p->p_sigstk.ss_flags |= SS_ONSTACK;
222	} else {
223		fp = (struct linux_rt_sigframe *)regs->tf_esp - 1;
224	}
225
226	/*
227	 * grow() will return FALSE if the fp will not fit inside the stack
228	 *	and the stack can not be grown. useracc will return FALSE
229	 *	if access is denied.
230	 */
231	if ((grow_stack (p, (int)fp) == FALSE) ||
232	    !useracc((caddr_t)fp, sizeof (struct linux_rt_sigframe),
233	    VM_PROT_WRITE)) {
234		/*
235		 * Process has trashed its stack; give it an illegal
236		 * instruction to halt it in its tracks.
237		 */
238		SIGACTION(p, SIGILL) = SIG_DFL;
239		SIGDELSET(p->p_sigignore, SIGILL);
240		SIGDELSET(p->p_sigcatch, SIGILL);
241		SIGDELSET(p->p_sigmask, SIGILL);
242#ifdef DEBUG
243		printf("Linux-emul(%ld): linux_rt_sendsig -- bad stack %p, SS_ONSTACK: 0x%x ",
244	    (long)p->p_pid, fp, p->p_sigstk.ss_flags & SS_ONSTACK);
245#endif
246		psignal(p, SIGILL);
247		return;
248	}
249
250	/*
251	 * Build the argument list for the signal handler.
252	 */
253	if (p->p_sysent->sv_sigtbl)
254		if (sig <= p->p_sysent->sv_sigsize)
255			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
256
257	frame.sf_handler = catcher;
258	frame.sf_sig = sig;
259
260	frame.sf_siginfo = &fp->sf_si;
261	frame.sf_ucontext = &fp->sf_sc;
262	/* Fill siginfo structure. */
263	frame.sf_si.lsi_signo = sig;
264	frame.sf_si.lsi_code = code;
265	frame.sf_si.lsi_addr = (void *)regs->tf_err;
266	/*
267	 * Build the signal context to be used by sigreturn.
268	 */
269	frame.sf_sc.uc_mcontext.sc_mask   = mask->__bits[0];
270	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
271	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
272	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
273	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
274	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
275	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
276	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
277	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
278	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
279	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
280	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
281	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
282	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
283	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
284	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
285	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
286	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
287	frame.sf_sc.uc_mcontext.sc_trapno = code;	/* XXX ???? */
288
289	/*
290	 * Build the remainder of the ucontext struct to be used by sigreturn.
291	 */
292	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
293	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
294	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
295	frame.sf_sc.uc_stack.ss_flags =
296	    bsd_to_linux_sigaltstack(p->p_sigstk.ss_flags);
297	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
298#ifdef DEBUG
299	printf("Linux-emul(%ld): rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x\n",
300	    (long)p->p_pid, frame.sf_sc.uc_stack.ss_flags,  p->p_sigstk.ss_sp,
301	    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
302#endif
303	bsd_to_linux_sigset(&p->p_sigmask, &frame.sf_sc.uc_sigmask);
304
305	if (copyout(&frame, fp, sizeof(frame)) != 0) {
306		/*
307		 * Process has trashed its stack; give it an illegal
308		 * instruction to halt it in its tracks.
309		 */
310		sigexit(p, SIGILL);
311		/* NOTREACHED */
312	}
313
314	/*
315	 * Build context to run handler in.
316	 */
317	regs->tf_esp = (int)fp;
318	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
319	    _linux_sznonrtsigcode;
320	regs->tf_eflags &= ~PSL_VM;
321	regs->tf_cs = _ucodesel;
322	regs->tf_ds = _udatasel;
323	regs->tf_es = _udatasel;
324	regs->tf_fs = _udatasel;
325	load_gs(_udatasel);
326	regs->tf_ss = _udatasel;
327}
328
329
330/*
331 * Send an interrupt to process.
332 *
333 * Stack is set up to allow sigcode stored
334 * in u. to call routine, followed by kcall
335 * to sigreturn routine below.  After sigreturn
336 * resets the signal mask, the stack, and the
337 * frame pointer, it returns to the user
338 * specified pc, psl.
339 */
340
341static void
342linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
343{
344	register struct proc *p = curproc;
345	register struct trapframe *regs;
346	struct linux_sigframe *fp, frame;
347	struct sigacts *psp = p->p_sigacts;
348	int oonstack;
349
350	regs = p->p_md.md_regs;
351	oonstack = p->p_sigstk.ss_flags & SS_ONSTACK;
352
353#ifdef DEBUG
354	printf("Linux-emul(%ld): linux_sendsig(%p, %d, %p, %lu)\n",
355	    (long)p->p_pid, catcher, sig, (void*)mask, code);
356#endif
357
358	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
359		/* Signal handler installed with SA_SIGINFO. */
360		linux_rt_sendsig(catcher, sig, mask, code);
361		return;
362	}
363
364	/*
365	 * Allocate space for the signal handler context.
366	 */
367	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
368	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
369		fp = (struct linux_sigframe *)(p->p_sigstk.ss_sp +
370		    p->p_sigstk.ss_size - sizeof(struct linux_sigframe));
371		p->p_sigstk.ss_flags |= SS_ONSTACK;
372	} else {
373		fp = (struct linux_sigframe *)regs->tf_esp - 1;
374	}
375
376	/*
377	 * grow() will return FALSE if the fp will not fit inside the stack
378	 *	and the stack can not be grown. useracc will return FALSE
379	 *	if access is denied.
380	 */
381	if ((grow_stack (p, (int)fp) == FALSE) ||
382	    !useracc((caddr_t)fp, sizeof (struct linux_sigframe),
383	    VM_PROT_WRITE)) {
384		/*
385		 * Process has trashed its stack; give it an illegal
386		 * instruction to halt it in its tracks.
387		 */
388		SIGACTION(p, SIGILL) = SIG_DFL;
389		SIGDELSET(p->p_sigignore, SIGILL);
390		SIGDELSET(p->p_sigcatch, SIGILL);
391		SIGDELSET(p->p_sigmask, SIGILL);
392		psignal(p, SIGILL);
393		return;
394	}
395
396	/*
397	 * Build the argument list for the signal handler.
398	 */
399	if (p->p_sysent->sv_sigtbl)
400		if (sig <= p->p_sysent->sv_sigsize)
401			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
402
403	frame.sf_handler = catcher;
404	frame.sf_sig = sig;
405
406	/*
407	 * Build the signal context to be used by sigreturn.
408	 */
409	frame.sf_sc.sc_mask   = mask->__bits[0];
410	frame.sf_sc.sc_gs     = rgs();
411	frame.sf_sc.sc_fs     = regs->tf_fs;
412	frame.sf_sc.sc_es     = regs->tf_es;
413	frame.sf_sc.sc_ds     = regs->tf_ds;
414	frame.sf_sc.sc_edi    = regs->tf_edi;
415	frame.sf_sc.sc_esi    = regs->tf_esi;
416	frame.sf_sc.sc_ebp    = regs->tf_ebp;
417	frame.sf_sc.sc_ebx    = regs->tf_ebx;
418	frame.sf_sc.sc_edx    = regs->tf_edx;
419	frame.sf_sc.sc_ecx    = regs->tf_ecx;
420	frame.sf_sc.sc_eax    = regs->tf_eax;
421	frame.sf_sc.sc_eip    = regs->tf_eip;
422	frame.sf_sc.sc_cs     = regs->tf_cs;
423	frame.sf_sc.sc_eflags = regs->tf_eflags;
424	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
425	frame.sf_sc.sc_ss     = regs->tf_ss;
426	frame.sf_sc.sc_err    = regs->tf_err;
427	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
428
429	if (copyout(&frame, fp, sizeof(frame)) != 0) {
430		/*
431		 * Process has trashed its stack; give it an illegal
432		 * instruction to halt it in its tracks.
433		 */
434		sigexit(p, SIGILL);
435		/* NOTREACHED */
436	}
437
438	/*
439	 * Build context to run handler in.
440	 */
441	regs->tf_esp = (int)fp;
442	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
443	regs->tf_eflags &= ~PSL_VM;
444	regs->tf_cs = _ucodesel;
445	regs->tf_ds = _udatasel;
446	regs->tf_es = _udatasel;
447	regs->tf_fs = _udatasel;
448	load_gs(_udatasel);
449	regs->tf_ss = _udatasel;
450}
451
452/*
453 * System call to cleanup state after a signal
454 * has been taken.  Reset signal mask and
455 * stack state from context left by sendsig (above).
456 * Return to previous pc and psl as specified by
457 * context left by sendsig. Check carefully to
458 * make sure that the user has not modified the
459 * psl to gain improper privileges or to cause
460 * a machine fault.
461 */
462int
463linux_sigreturn(p, args)
464	struct proc *p;
465	struct linux_sigreturn_args *args;
466{
467	struct linux_sigcontext context;
468	register struct trapframe *regs;
469	int eflags;
470
471	regs = p->p_md.md_regs;
472
473#ifdef DEBUG
474	printf("Linux-emul(%ld): linux_sigreturn(%p)\n",
475	    (long)p->p_pid, (void *)args->scp);
476#endif
477	/*
478	 * The trampoline code hands us the context.
479	 * It is unsafe to keep track of it ourselves, in the event that a
480	 * program jumps out of a signal handler.
481	 */
482	if (copyin((caddr_t)args->scp, &context, sizeof(context)) != 0)
483		return (EFAULT);
484
485	/*
486	 * Check for security violations.
487	 */
488#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
489	eflags = context.sc_eflags;
490	/*
491	 * XXX do allow users to change the privileged flag PSL_RF.  The
492	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
493	 * sometimes set it there too.  tf_eflags is kept in the signal
494	 * context during signal handling and there is no other place
495	 * to remember it, so the PSL_RF bit may be corrupted by the
496	 * signal handler without us knowing.  Corruption of the PSL_RF
497	 * bit at worst causes one more or one less debugger trap, so
498	 * allowing it is fairly harmless.
499	 */
500	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
501    		return(EINVAL);
502	}
503
504	/*
505	 * Don't allow users to load a valid privileged %cs.  Let the
506	 * hardware check for invalid selectors, excess privilege in
507	 * other selectors, invalid %eip's and invalid %esp's.
508	 */
509#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
510	if (!CS_SECURE(context.sc_cs)) {
511		trapsignal(p, SIGBUS, T_PROTFLT);
512		return(EINVAL);
513	}
514
515	p->p_sigstk.ss_flags &= ~SS_ONSTACK;
516	SIGSETOLD(p->p_sigmask, context.sc_mask);
517	SIG_CANTMASK(p->p_sigmask);
518
519	/*
520	 * Restore signal context.
521	 */
522	/* %gs was restored by the trampoline. */
523	regs->tf_fs     = context.sc_fs;
524	regs->tf_es     = context.sc_es;
525	regs->tf_ds     = context.sc_ds;
526	regs->tf_edi    = context.sc_edi;
527	regs->tf_esi    = context.sc_esi;
528	regs->tf_ebp    = context.sc_ebp;
529	regs->tf_ebx    = context.sc_ebx;
530	regs->tf_edx    = context.sc_edx;
531	regs->tf_ecx    = context.sc_ecx;
532	regs->tf_eax    = context.sc_eax;
533	regs->tf_eip    = context.sc_eip;
534	regs->tf_cs     = context.sc_cs;
535	regs->tf_eflags = eflags;
536	regs->tf_esp    = context.sc_esp_at_signal;
537	regs->tf_ss     = context.sc_ss;
538
539	return (EJUSTRETURN);
540}
541
542/*
543 * System call to cleanup state after a signal
544 * has been taken.  Reset signal mask and
545 * stack state from context left by rt_sendsig (above).
546 * Return to previous pc and psl as specified by
547 * context left by sendsig. Check carefully to
548 * make sure that the user has not modified the
549 * psl to gain improper privileges or to cause
550 * a machine fault.
551 */
552int
553linux_rt_sigreturn(p, args)
554	struct proc *p;
555	struct linux_rt_sigreturn_args *args;
556{
557	struct sigaltstack_args sasargs;
558	struct linux_ucontext 	 uc;
559	struct linux_sigcontext *context;
560	linux_stack_t *lss;
561	stack_t *ss;
562	register struct trapframe *regs;
563	int eflags;
564	caddr_t sg = stackgap_init();
565
566	regs = p->p_md.md_regs;
567
568#ifdef DEBUG
569	printf("Linux-emul(%ld): linux_rt_sigreturn(%p)\n",
570	    (long)p->p_pid, (void *)args->ucp);
571#endif
572	/*
573	 * The trampoline code hands us the u_context.
574	 * It is unsafe to keep track of it ourselves, in the event that a
575	 * program jumps out of a signal handler.
576	 */
577	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
578		return (EFAULT);
579
580	context = &uc.uc_mcontext;
581
582	/*
583	 * Check for security violations.
584	 */
585#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
586	eflags = context->sc_eflags;
587	/*
588	 * XXX do allow users to change the privileged flag PSL_RF.  The
589	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
590	 * sometimes set it there too.  tf_eflags is kept in the signal
591	 * context during signal handling and there is no other place
592	 * to remember it, so the PSL_RF bit may be corrupted by the
593	 * signal handler without us knowing.  Corruption of the PSL_RF
594	 * bit at worst causes one more or one less debugger trap, so
595	 * allowing it is fairly harmless.
596	 */
597	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
598    		return(EINVAL);
599	}
600
601	/*
602	 * Don't allow users to load a valid privileged %cs.  Let the
603	 * hardware check for invalid selectors, excess privilege in
604	 * other selectors, invalid %eip's and invalid %esp's.
605	 */
606#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
607	if (!CS_SECURE(context->sc_cs)) {
608		trapsignal(p, SIGBUS, T_PROTFLT);
609		return(EINVAL);
610	}
611
612	p->p_sigstk.ss_flags &= ~SS_ONSTACK;
613	SIGSETOLD(p->p_sigmask, context->sc_mask);
614	SIG_CANTMASK(p->p_sigmask);
615
616	/*
617	 * Restore signal context->
618	 */
619	/* %gs was restored by the trampoline. */
620	regs->tf_fs     = context->sc_fs;
621	regs->tf_es     = context->sc_es;
622	regs->tf_ds     = context->sc_ds;
623	regs->tf_edi    = context->sc_edi;
624	regs->tf_esi    = context->sc_esi;
625	regs->tf_ebp    = context->sc_ebp;
626	regs->tf_ebx    = context->sc_ebx;
627	regs->tf_edx    = context->sc_edx;
628	regs->tf_ecx    = context->sc_ecx;
629	regs->tf_eax    = context->sc_eax;
630	regs->tf_eip    = context->sc_eip;
631	regs->tf_cs     = context->sc_cs;
632	regs->tf_eflags = eflags;
633	regs->tf_esp    = context->sc_esp_at_signal;
634	regs->tf_ss     = context->sc_ss;
635
636	/*
637	 * call sigaltstack & ignore results..
638	 */
639	ss = stackgap_alloc(&sg, sizeof(stack_t));
640	lss = &uc.uc_stack;
641	ss->ss_sp = lss->ss_sp;
642	ss->ss_size = lss->ss_size;
643	ss->ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
644
645#ifdef DEBUG
646	printf("Linux-emul(%ld): rt_sigret  flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x\n",
647	    (long)p->p_pid, ss->ss_flags, ss->ss_sp, ss->ss_size, context->sc_mask);
648#endif
649	sasargs.ss = ss;
650	sasargs.oss = NULL;
651	(void) sigaltstack(p, &sasargs);
652
653	return (EJUSTRETURN);
654}
655
656static void
657linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
658{
659	args[0] = tf->tf_ebx;
660	args[1] = tf->tf_ecx;
661	args[2] = tf->tf_edx;
662	args[3] = tf->tf_esi;
663	args[4] = tf->tf_edi;
664	*params = NULL;		/* no copyin */
665}
666
667/*
668 * If a linux binary is exec'ing something, try this image activator
669 * first.  We override standard shell script execution in order to
670 * be able to modify the interpreter path.  We only do this if a linux
671 * binary is doing the exec, so we do not create an EXEC module for it.
672 */
673static int	exec_linux_imgact_try __P((struct image_params *iparams));
674
675static int
676exec_linux_imgact_try(imgp)
677    struct image_params *imgp;
678{
679    const char *head = (const char *)imgp->image_header;
680    int error = -1;
681
682    /*
683     * The interpreter for shell scripts run from a linux binary needs
684     * to be located in /compat/linux if possible in order to recursively
685     * maintain linux path emulation.
686     */
687    if (((const short *)head)[0] == SHELLMAGIC) {
688	    /*
689	     * Run our normal shell image activator.  If it succeeds attempt
690	     * to use the alternate path for the interpreter.  If an alternate
691	     * path is found, use our stringspace to store it.
692	     */
693	    if ((error = exec_shell_imgact(imgp)) == 0) {
694		    char *rpath = NULL;
695
696		    linux_emul_find(imgp->proc, NULL, linux_emul_path,
697			imgp->interpreter_name, &rpath, 0);
698		    if (rpath != imgp->interpreter_name) {
699			    int len = strlen(rpath) + 1;
700
701			    if (len <= MAXSHELLCMDLEN) {
702				memcpy(imgp->interpreter_name, rpath, len);
703			    }
704			    free(rpath, M_TEMP);
705		    }
706	    }
707    }
708    return(error);
709}
710
711struct sysentvec linux_sysvec = {
712	LINUX_SYS_MAXSYSCALL,
713	linux_sysent,
714	0xff,
715	LINUX_SIGTBLSZ,
716	bsd_to_linux_signal,
717	ELAST + 1,
718	bsd_to_linux_errno,
719	translate_traps,
720	linux_fixup,
721	linux_sendsig,
722	linux_sigcode,
723	&linux_szsigcode,
724	linux_prepsyscall,
725	"Linux a.out",
726	aout_coredump,
727	exec_linux_imgact_try,
728	LINUX_MINSIGSTKSZ
729};
730
731struct sysentvec elf_linux_sysvec = {
732	LINUX_SYS_MAXSYSCALL,
733	linux_sysent,
734	0xff,
735	LINUX_SIGTBLSZ,
736	bsd_to_linux_signal,
737	ELAST + 1,
738	bsd_to_linux_errno,
739	translate_traps,
740	elf_linux_fixup,
741	linux_sendsig,
742	linux_sigcode,
743	&linux_szsigcode,
744	linux_prepsyscall,
745	"Linux ELF",
746	elf_coredump,
747	exec_linux_imgact_try,
748	LINUX_MINSIGSTKSZ
749};
750
751static Elf32_Brandinfo linux_brand = {
752					ELFOSABI_LINUX,
753					"/compat/linux",
754					"/lib/ld-linux.so.1",
755					&elf_linux_sysvec
756				 };
757
758static Elf32_Brandinfo linux_glibc2brand = {
759					ELFOSABI_LINUX,
760					"/compat/linux",
761					"/lib/ld-linux.so.2",
762					&elf_linux_sysvec
763				 };
764
765Elf32_Brandinfo *linux_brandlist[] = {
766					&linux_brand,
767					&linux_glibc2brand,
768					NULL
769				};
770
771static int
772linux_elf_modevent(module_t mod, int type, void *data)
773{
774	Elf32_Brandinfo **brandinfo;
775	int error;
776
777	error = 0;
778
779	switch(type) {
780	case MOD_LOAD:
781		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
782		     ++brandinfo)
783			if (elf_insert_brand_entry(*brandinfo) < 0)
784				error = EINVAL;
785		if (error)
786			printf("cannot insert Linux elf brand handler\n");
787		else {
788			linux_ioctl_register_handlers(&linux_ioctl_handler_set);
789			if (bootverbose)
790				printf("Linux-ELF exec handler installed\n");
791		}
792		break;
793	case MOD_UNLOAD:
794		linux_ioctl_unregister_handlers(&linux_ioctl_handler_set);
795		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
796		     ++brandinfo)
797			if (elf_brand_inuse(*brandinfo))
798				error = EBUSY;
799
800		if (error == 0) {
801			for (brandinfo = &linux_brandlist[0];
802			     *brandinfo != NULL; ++brandinfo)
803				if (elf_remove_brand_entry(*brandinfo) < 0)
804					error = EINVAL;
805		}
806		if (error)
807			printf("Could not deinstall ELF interpreter entry\n");
808		else if (bootverbose)
809			printf("Linux-elf exec handler removed\n");
810		break;
811	default:
812		break;
813	}
814	return error;
815}
816static moduledata_t linux_elf_mod = {
817	"linuxelf",
818	linux_elf_modevent,
819	0
820};
821DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
822