linux_sysvec.c revision 76497
1/*-
2 * Copyright (c) 1994-1996 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software withough specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: head/sys/i386/linux/linux_sysvec.c 76497 2001-05-12 03:23:10Z deischen $
29 */
30
31/* XXX we use functions that might not exist. */
32#include "opt_compat.h"
33
34#ifndef COMPAT_43
35#error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36#endif
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/imgact.h>
41#include <sys/imgact_aout.h>
42#include <sys/imgact_elf.h>
43#include <sys/malloc.h>
44#include <sys/proc.h>
45#include <sys/signalvar.h>
46#include <sys/sysent.h>
47#include <sys/sysproto.h>
48
49#include <vm/vm.h>
50#include <vm/vm_param.h>
51#include <vm/vm_page.h>
52#include <vm/vm_extern.h>
53#include <sys/exec.h>
54#include <sys/kernel.h>
55#include <sys/module.h>
56#include <machine/cpu.h>
57#include <sys/lock.h>
58#include <sys/mutex.h>
59
60#include <i386/linux/linux.h>
61#include <i386/linux/linux_proto.h>
62#include <compat/linux/linux_signal.h>
63#include <compat/linux/linux_util.h>
64
65MODULE_VERSION(linux, 1);
66MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
67MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
68MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
69
70MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
71
72#if BYTE_ORDER == LITTLE_ENDIAN
73#define SHELLMAGIC      0x2123 /* #! */
74#else
75#define SHELLMAGIC      0x2321
76#endif
77
78extern char linux_sigcode[];
79extern int linux_szsigcode;
80
81extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
82
83extern struct linker_set linux_ioctl_handler_set;
84
85static int	linux_fixup __P((register_t **stack_base,
86				 struct image_params *iparams));
87static int	elf_linux_fixup __P((register_t **stack_base,
88				     struct image_params *iparams));
89static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
90				       u_int *code, caddr_t *params));
91static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
92				   u_long code));
93
94/*
95 * Linux syscalls return negative errno's, we do positive and map them
96 */
97static int bsd_to_linux_errno[ELAST + 1] = {
98  	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
99 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
100 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
101 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
102 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
103	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
104	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
105	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
106  	-6, -6, -43, -42, -75, -6, -84
107};
108
109int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
110	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
111	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
112	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
113	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
114	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
115	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
116	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
117	0, LINUX_SIGUSR1, LINUX_SIGUSR2
118};
119
120int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
121	SIGHUP, SIGINT, SIGQUIT, SIGILL,
122	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
123	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
124	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
125	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
126	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
127	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
128	SIGIO, SIGURG, 0
129};
130
131/*
132 * If FreeBSD & Linux have a difference of opinion about what a trap
133 * means, deal with it here.
134 */
135static int
136translate_traps(int signal, int trap_code)
137{
138	if (signal != SIGBUS)
139		return signal;
140	switch (trap_code) {
141	case T_PROTFLT:
142	case T_TSSFLT:
143	case T_DOUBLEFLT:
144	case T_PAGEFLT:
145		return SIGSEGV;
146	default:
147		return signal;
148	}
149}
150
151static int
152linux_fixup(register_t **stack_base, struct image_params *imgp)
153{
154	register_t *argv, *envp;
155
156	argv = *stack_base;
157	envp = *stack_base + (imgp->argc + 1);
158	(*stack_base)--;
159	**stack_base = (intptr_t)(void *)envp;
160	(*stack_base)--;
161	**stack_base = (intptr_t)(void *)argv;
162	(*stack_base)--;
163	**stack_base = imgp->argc;
164	return 0;
165}
166
167static int
168elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
169{
170	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
171	register_t *pos;
172
173	pos = *stack_base + (imgp->argc + imgp->envc + 2);
174
175	if (args->trace) {
176		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
177	}
178	if (args->execfd != -1) {
179		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
180	}
181	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
182	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
183	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
184	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
185	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
186	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
187	AUXARGS_ENTRY(pos, AT_BASE, args->base);
188	PROC_LOCK(imgp->proc);
189	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid);
190	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid);
191	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid);
192	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid);
193	PROC_UNLOCK(imgp->proc);
194	AUXARGS_ENTRY(pos, AT_NULL, 0);
195
196	free(imgp->auxargs, M_TEMP);
197	imgp->auxargs = NULL;
198
199	(*stack_base)--;
200	**stack_base = (long)imgp->argc;
201	return 0;
202}
203
204extern int _ucodesel, _udatasel;
205extern unsigned long linux_sznonrtsigcode;
206
207static void
208linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
209{
210	register struct proc *p = curproc;
211	register struct trapframe *regs;
212	struct linux_rt_sigframe *fp, frame;
213	int oonstack;
214
215	regs = p->p_md.md_regs;
216	oonstack = sigonstack(regs->tf_esp);
217
218#ifdef DEBUG
219	if (ldebug(sigreturn))
220		printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
221		    catcher, sig, (void*)mask, code);
222#endif
223	/*
224	 * Allocate space for the signal handler context.
225	 */
226	PROC_LOCK(p);
227	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
228	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
229		fp = (struct linux_rt_sigframe *)(p->p_sigstk.ss_sp +
230		    p->p_sigstk.ss_size - sizeof(struct linux_rt_sigframe));
231	} else
232		fp = (struct linux_rt_sigframe *)regs->tf_esp - 1;
233	PROC_UNLOCK(p);
234
235	/*
236	 * grow() will return FALSE if the fp will not fit inside the stack
237	 *	and the stack can not be grown. useracc will return FALSE
238	 *	if access is denied.
239	 */
240	if ((grow_stack (p, (int)fp) == FALSE) ||
241	    !useracc((caddr_t)fp, sizeof (struct linux_rt_sigframe),
242	    VM_PROT_WRITE)) {
243		/*
244		 * Process has trashed its stack; give it an illegal
245		 * instruction to halt it in its tracks.
246		 */
247		PROC_LOCK(p);
248		SIGACTION(p, SIGILL) = SIG_DFL;
249		SIGDELSET(p->p_sigignore, SIGILL);
250		SIGDELSET(p->p_sigcatch, SIGILL);
251		SIGDELSET(p->p_sigmask, SIGILL);
252#ifdef DEBUG
253		if (ldebug(sigreturn))
254			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
255			    fp, oonstack);
256#endif
257		psignal(p, SIGILL);
258		PROC_UNLOCK(p);
259		return;
260	}
261
262	/*
263	 * Build the argument list for the signal handler.
264	 */
265	if (p->p_sysent->sv_sigtbl)
266		if (sig <= p->p_sysent->sv_sigsize)
267			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
268
269	frame.sf_handler = catcher;
270	frame.sf_sig = sig;
271	frame.sf_siginfo = &fp->sf_si;
272	frame.sf_ucontext = &fp->sf_sc;
273
274	/* Fill siginfo structure. */
275	frame.sf_si.lsi_signo = sig;
276	frame.sf_si.lsi_code = code;
277	frame.sf_si.lsi_addr = (void *)regs->tf_err;
278
279	/*
280	 * Build the signal context to be used by sigreturn.
281	 */
282	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
283	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
284
285	PROC_LOCK(p);
286	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
287	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
288	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
289	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
290	PROC_UNLOCK(p);
291
292	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
293
294	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
295	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
296	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
297	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
298	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
299	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
300	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
301	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
302	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
303	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
304	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
305	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
306	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
307	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
308	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
309	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
310	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
311	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
312	frame.sf_sc.uc_mcontext.sc_trapno = code;	/* XXX ???? */
313
314#ifdef DEBUG
315	if (ldebug(sigreturn))
316		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
317		    frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
318		    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
319#endif
320
321	if (copyout(&frame, fp, sizeof(frame)) != 0) {
322		/*
323		 * Process has trashed its stack; give it an illegal
324		 * instruction to halt it in its tracks.
325		 */
326		PROC_LOCK(p);
327		sigexit(p, SIGILL);
328		/* NOTREACHED */
329	}
330
331	/*
332	 * Build context to run handler in.
333	 */
334	regs->tf_esp = (int)fp;
335	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
336	    linux_sznonrtsigcode;
337	regs->tf_eflags &= ~PSL_VM;
338	regs->tf_cs = _ucodesel;
339	regs->tf_ds = _udatasel;
340	regs->tf_es = _udatasel;
341	regs->tf_fs = _udatasel;
342	regs->tf_ss = _udatasel;
343}
344
345
346/*
347 * Send an interrupt to process.
348 *
349 * Stack is set up to allow sigcode stored
350 * in u. to call routine, followed by kcall
351 * to sigreturn routine below.  After sigreturn
352 * resets the signal mask, the stack, and the
353 * frame pointer, it returns to the user
354 * specified pc, psl.
355 */
356
357static void
358linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
359{
360	register struct proc *p = curproc;
361	register struct trapframe *regs;
362	struct linux_sigframe *fp, frame;
363	linux_sigset_t lmask;
364	int oonstack, i;
365
366	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
367		/* Signal handler installed with SA_SIGINFO. */
368		linux_rt_sendsig(catcher, sig, mask, code);
369		return;
370	}
371
372	regs = p->p_md.md_regs;
373	oonstack = sigonstack(regs->tf_esp);
374
375#ifdef DEBUG
376	if (ldebug(sigreturn))
377		printf(ARGS(sendsig, "%p, %d, %p, %lu"),
378		    catcher, sig, (void*)mask, code);
379#endif
380
381	/*
382	 * Allocate space for the signal handler context.
383	 */
384	PROC_LOCK(p);
385	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
386	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
387		fp = (struct linux_sigframe *)(p->p_sigstk.ss_sp +
388		    p->p_sigstk.ss_size - sizeof(struct linux_sigframe));
389	} else
390		fp = (struct linux_sigframe *)regs->tf_esp - 1;
391	PROC_UNLOCK(p);
392
393	/*
394	 * grow() will return FALSE if the fp will not fit inside the stack
395	 *	and the stack can not be grown. useracc will return FALSE
396	 *	if access is denied.
397	 */
398	if ((grow_stack (p, (int)fp) == FALSE) ||
399	    !useracc((caddr_t)fp, sizeof (struct linux_sigframe),
400	    VM_PROT_WRITE)) {
401		/*
402		 * Process has trashed its stack; give it an illegal
403		 * instruction to halt it in its tracks.
404		 */
405		PROC_LOCK(p);
406		SIGACTION(p, SIGILL) = SIG_DFL;
407		SIGDELSET(p->p_sigignore, SIGILL);
408		SIGDELSET(p->p_sigcatch, SIGILL);
409		SIGDELSET(p->p_sigmask, SIGILL);
410		psignal(p, SIGILL);
411		PROC_UNLOCK(p);
412		return;
413	}
414
415	/*
416	 * Build the argument list for the signal handler.
417	 */
418	if (p->p_sysent->sv_sigtbl)
419		if (sig <= p->p_sysent->sv_sigsize)
420			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
421
422	frame.sf_handler = catcher;
423	frame.sf_sig = sig;
424
425	bsd_to_linux_sigset(mask, &lmask);
426
427	/*
428	 * Build the signal context to be used by sigreturn.
429	 */
430	frame.sf_sc.sc_mask   = lmask.__bits[0];
431	frame.sf_sc.sc_gs     = rgs();
432	frame.sf_sc.sc_fs     = regs->tf_fs;
433	frame.sf_sc.sc_es     = regs->tf_es;
434	frame.sf_sc.sc_ds     = regs->tf_ds;
435	frame.sf_sc.sc_edi    = regs->tf_edi;
436	frame.sf_sc.sc_esi    = regs->tf_esi;
437	frame.sf_sc.sc_ebp    = regs->tf_ebp;
438	frame.sf_sc.sc_ebx    = regs->tf_ebx;
439	frame.sf_sc.sc_edx    = regs->tf_edx;
440	frame.sf_sc.sc_ecx    = regs->tf_ecx;
441	frame.sf_sc.sc_eax    = regs->tf_eax;
442	frame.sf_sc.sc_eip    = regs->tf_eip;
443	frame.sf_sc.sc_cs     = regs->tf_cs;
444	frame.sf_sc.sc_eflags = regs->tf_eflags;
445	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
446	frame.sf_sc.sc_ss     = regs->tf_ss;
447	frame.sf_sc.sc_err    = regs->tf_err;
448	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
449
450	bzero(&frame.sf_fpstate, sizeof(struct linux_fpstate));
451
452	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
453		frame.sf_extramask[i] = lmask.__bits[i+1];
454
455	if (copyout(&frame, fp, sizeof(frame)) != 0) {
456		/*
457		 * Process has trashed its stack; give it an illegal
458		 * instruction to halt it in its tracks.
459		 */
460		PROC_LOCK(p);
461		sigexit(p, SIGILL);
462		/* NOTREACHED */
463	}
464
465	/*
466	 * Build context to run handler in.
467	 */
468	regs->tf_esp = (int)fp;
469	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
470	regs->tf_eflags &= ~PSL_VM;
471	regs->tf_cs = _ucodesel;
472	regs->tf_ds = _udatasel;
473	regs->tf_es = _udatasel;
474	regs->tf_fs = _udatasel;
475	regs->tf_ss = _udatasel;
476}
477
478/*
479 * System call to cleanup state after a signal
480 * has been taken.  Reset signal mask and
481 * stack state from context left by sendsig (above).
482 * Return to previous pc and psl as specified by
483 * context left by sendsig. Check carefully to
484 * make sure that the user has not modified the
485 * psl to gain improper privileges or to cause
486 * a machine fault.
487 */
488int
489linux_sigreturn(p, args)
490	struct proc *p;
491	struct linux_sigreturn_args *args;
492{
493	struct linux_sigframe frame;
494	register struct trapframe *regs;
495	linux_sigset_t lmask;
496	int eflags, i;
497
498	regs = p->p_md.md_regs;
499
500#ifdef DEBUG
501	if (ldebug(sigreturn))
502		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
503#endif
504	/*
505	 * The trampoline code hands us the sigframe.
506	 * It is unsafe to keep track of it ourselves, in the event that a
507	 * program jumps out of a signal handler.
508	 */
509	if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
510		return (EFAULT);
511
512	/*
513	 * Check for security violations.
514	 */
515#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
516	eflags = frame.sf_sc.sc_eflags;
517	/*
518	 * XXX do allow users to change the privileged flag PSL_RF.  The
519	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
520	 * sometimes set it there too.  tf_eflags is kept in the signal
521	 * context during signal handling and there is no other place
522	 * to remember it, so the PSL_RF bit may be corrupted by the
523	 * signal handler without us knowing.  Corruption of the PSL_RF
524	 * bit at worst causes one more or one less debugger trap, so
525	 * allowing it is fairly harmless.
526	 */
527	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
528    		return(EINVAL);
529	}
530
531	/*
532	 * Don't allow users to load a valid privileged %cs.  Let the
533	 * hardware check for invalid selectors, excess privilege in
534	 * other selectors, invalid %eip's and invalid %esp's.
535	 */
536#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
537	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
538		trapsignal(p, SIGBUS, T_PROTFLT);
539		return(EINVAL);
540	}
541
542	lmask.__bits[0] = frame.sf_sc.sc_mask;
543	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
544		lmask.__bits[i+1] = frame.sf_extramask[i];
545	PROC_LOCK(p);
546	linux_to_bsd_sigset(&lmask, &p->p_sigmask);
547	SIG_CANTMASK(p->p_sigmask);
548	PROC_UNLOCK(p);
549
550	/*
551	 * Restore signal context.
552	 */
553	/* %gs was restored by the trampoline. */
554	regs->tf_fs     = frame.sf_sc.sc_fs;
555	regs->tf_es     = frame.sf_sc.sc_es;
556	regs->tf_ds     = frame.sf_sc.sc_ds;
557	regs->tf_edi    = frame.sf_sc.sc_edi;
558	regs->tf_esi    = frame.sf_sc.sc_esi;
559	regs->tf_ebp    = frame.sf_sc.sc_ebp;
560	regs->tf_ebx    = frame.sf_sc.sc_ebx;
561	regs->tf_edx    = frame.sf_sc.sc_edx;
562	regs->tf_ecx    = frame.sf_sc.sc_ecx;
563	regs->tf_eax    = frame.sf_sc.sc_eax;
564	regs->tf_eip    = frame.sf_sc.sc_eip;
565	regs->tf_cs     = frame.sf_sc.sc_cs;
566	regs->tf_eflags = eflags;
567	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
568	regs->tf_ss     = frame.sf_sc.sc_ss;
569
570	return (EJUSTRETURN);
571}
572
573/*
574 * System call to cleanup state after a signal
575 * has been taken.  Reset signal mask and
576 * stack state from context left by rt_sendsig (above).
577 * Return to previous pc and psl as specified by
578 * context left by sendsig. Check carefully to
579 * make sure that the user has not modified the
580 * psl to gain improper privileges or to cause
581 * a machine fault.
582 */
583int
584linux_rt_sigreturn(p, args)
585	struct proc *p;
586	struct linux_rt_sigreturn_args *args;
587{
588	struct sigaltstack_args sasargs;
589	struct linux_ucontext 	 uc;
590	struct linux_sigcontext *context;
591	linux_stack_t *lss;
592	stack_t *ss;
593	register struct trapframe *regs;
594	int eflags;
595	caddr_t sg = stackgap_init();
596
597	regs = p->p_md.md_regs;
598
599#ifdef DEBUG
600	if (ldebug(rt_sigreturn))
601		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
602#endif
603	/*
604	 * The trampoline code hands us the ucontext.
605	 * It is unsafe to keep track of it ourselves, in the event that a
606	 * program jumps out of a signal handler.
607	 */
608	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
609		return (EFAULT);
610
611	context = &uc.uc_mcontext;
612
613	/*
614	 * Check for security violations.
615	 */
616#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
617	eflags = context->sc_eflags;
618	/*
619	 * XXX do allow users to change the privileged flag PSL_RF.  The
620	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
621	 * sometimes set it there too.  tf_eflags is kept in the signal
622	 * context during signal handling and there is no other place
623	 * to remember it, so the PSL_RF bit may be corrupted by the
624	 * signal handler without us knowing.  Corruption of the PSL_RF
625	 * bit at worst causes one more or one less debugger trap, so
626	 * allowing it is fairly harmless.
627	 */
628	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
629    		return(EINVAL);
630	}
631
632	/*
633	 * Don't allow users to load a valid privileged %cs.  Let the
634	 * hardware check for invalid selectors, excess privilege in
635	 * other selectors, invalid %eip's and invalid %esp's.
636	 */
637#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
638	if (!CS_SECURE(context->sc_cs)) {
639		trapsignal(p, SIGBUS, T_PROTFLT);
640		return(EINVAL);
641	}
642
643	PROC_LOCK(p);
644	linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
645	SIG_CANTMASK(p->p_sigmask);
646	PROC_UNLOCK(p);
647
648	/*
649	 * Restore signal context
650	 */
651	/* %gs was restored by the trampoline. */
652	regs->tf_fs     = context->sc_fs;
653	regs->tf_es     = context->sc_es;
654	regs->tf_ds     = context->sc_ds;
655	regs->tf_edi    = context->sc_edi;
656	regs->tf_esi    = context->sc_esi;
657	regs->tf_ebp    = context->sc_ebp;
658	regs->tf_ebx    = context->sc_ebx;
659	regs->tf_edx    = context->sc_edx;
660	regs->tf_ecx    = context->sc_ecx;
661	regs->tf_eax    = context->sc_eax;
662	regs->tf_eip    = context->sc_eip;
663	regs->tf_cs     = context->sc_cs;
664	regs->tf_eflags = eflags;
665	regs->tf_esp    = context->sc_esp_at_signal;
666	regs->tf_ss     = context->sc_ss;
667
668	/*
669	 * call sigaltstack & ignore results..
670	 */
671	ss = stackgap_alloc(&sg, sizeof(stack_t));
672	lss = &uc.uc_stack;
673	ss->ss_sp = lss->ss_sp;
674	ss->ss_size = lss->ss_size;
675	ss->ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
676
677#ifdef DEBUG
678	if (ldebug(rt_sigreturn))
679		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
680		    ss->ss_flags, ss->ss_sp, ss->ss_size, context->sc_mask);
681#endif
682	sasargs.ss = ss;
683	sasargs.oss = NULL;
684	(void) sigaltstack(p, &sasargs);
685
686	return (EJUSTRETURN);
687}
688
689static void
690linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
691{
692	args[0] = tf->tf_ebx;
693	args[1] = tf->tf_ecx;
694	args[2] = tf->tf_edx;
695	args[3] = tf->tf_esi;
696	args[4] = tf->tf_edi;
697	*params = NULL;		/* no copyin */
698}
699
700/*
701 * If a linux binary is exec'ing something, try this image activator
702 * first.  We override standard shell script execution in order to
703 * be able to modify the interpreter path.  We only do this if a linux
704 * binary is doing the exec, so we do not create an EXEC module for it.
705 */
706static int	exec_linux_imgact_try __P((struct image_params *iparams));
707
708static int
709exec_linux_imgact_try(imgp)
710    struct image_params *imgp;
711{
712    const char *head = (const char *)imgp->image_header;
713    int error = -1;
714
715    /*
716     * The interpreter for shell scripts run from a linux binary needs
717     * to be located in /compat/linux if possible in order to recursively
718     * maintain linux path emulation.
719     */
720    if (((const short *)head)[0] == SHELLMAGIC) {
721	    /*
722	     * Run our normal shell image activator.  If it succeeds attempt
723	     * to use the alternate path for the interpreter.  If an alternate
724	     * path is found, use our stringspace to store it.
725	     */
726	    if ((error = exec_shell_imgact(imgp)) == 0) {
727		    char *rpath = NULL;
728
729		    linux_emul_find(imgp->proc, NULL, linux_emul_path,
730			imgp->interpreter_name, &rpath, 0);
731		    if (rpath != imgp->interpreter_name) {
732			    int len = strlen(rpath) + 1;
733
734			    if (len <= MAXSHELLCMDLEN) {
735				memcpy(imgp->interpreter_name, rpath, len);
736			    }
737			    free(rpath, M_TEMP);
738		    }
739	    }
740    }
741    return(error);
742}
743
744struct sysentvec linux_sysvec = {
745	LINUX_SYS_MAXSYSCALL,
746	linux_sysent,
747	0xff,
748	LINUX_SIGTBLSZ,
749	bsd_to_linux_signal,
750	ELAST + 1,
751	bsd_to_linux_errno,
752	translate_traps,
753	linux_fixup,
754	linux_sendsig,
755	linux_sigcode,
756	&linux_szsigcode,
757	linux_prepsyscall,
758	"Linux a.out",
759	aout_coredump,
760	exec_linux_imgact_try,
761	LINUX_MINSIGSTKSZ
762};
763
764struct sysentvec elf_linux_sysvec = {
765	LINUX_SYS_MAXSYSCALL,
766	linux_sysent,
767	0xff,
768	LINUX_SIGTBLSZ,
769	bsd_to_linux_signal,
770	ELAST + 1,
771	bsd_to_linux_errno,
772	translate_traps,
773	elf_linux_fixup,
774	linux_sendsig,
775	linux_sigcode,
776	&linux_szsigcode,
777	linux_prepsyscall,
778	"Linux ELF",
779	elf_coredump,
780	exec_linux_imgact_try,
781	LINUX_MINSIGSTKSZ
782};
783
784static Elf32_Brandinfo linux_brand = {
785					ELFOSABI_LINUX,
786					"Linux",
787					"/compat/linux",
788					"/lib/ld-linux.so.1",
789					&elf_linux_sysvec
790				 };
791
792static Elf32_Brandinfo linux_glibc2brand = {
793					ELFOSABI_LINUX,
794					"Linux",
795					"/compat/linux",
796					"/lib/ld-linux.so.2",
797					&elf_linux_sysvec
798				 };
799
800Elf32_Brandinfo *linux_brandlist[] = {
801					&linux_brand,
802					&linux_glibc2brand,
803					NULL
804				};
805
806static int
807linux_elf_modevent(module_t mod, int type, void *data)
808{
809	Elf32_Brandinfo **brandinfo;
810	int error;
811
812	error = 0;
813
814	switch(type) {
815	case MOD_LOAD:
816		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
817		     ++brandinfo)
818			if (elf_insert_brand_entry(*brandinfo) < 0)
819				error = EINVAL;
820		if (error == 0) {
821			linux_ioctl_register_handlers(
822				&linux_ioctl_handler_set);
823			if (bootverbose)
824				printf("Linux ELF exec handler installed\n");
825		} else
826			printf("cannot insert Linux ELF brand handler\n");
827		break;
828	case MOD_UNLOAD:
829		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
830		     ++brandinfo)
831			if (elf_brand_inuse(*brandinfo))
832				error = EBUSY;
833		if (error == 0) {
834			for (brandinfo = &linux_brandlist[0];
835			     *brandinfo != NULL; ++brandinfo)
836				if (elf_remove_brand_entry(*brandinfo) < 0)
837					error = EINVAL;
838		}
839		if (error == 0) {
840			linux_ioctl_unregister_handlers(
841				&linux_ioctl_handler_set);
842			if (bootverbose)
843				printf("Linux ELF exec handler removed\n");
844		} else
845			printf("Could not deinstall ELF interpreter entry\n");
846		break;
847	default:
848		break;
849	}
850	return error;
851}
852
853static moduledata_t linux_elf_mod = {
854	"linuxelf",
855	linux_elf_modevent,
856	0
857};
858
859DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
860