linux_sysvec.c revision 111797
1/*-
2 * Copyright (c) 1994-1996 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: head/sys/i386/linux/linux_sysvec.c 111797 2003-03-03 09:14:26Z des $
29 */
30
31/* XXX we use functions that might not exist. */
32#include "opt_compat.h"
33
34#ifndef COMPAT_43
35#error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36#endif
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/imgact.h>
41#include <sys/imgact_aout.h>
42#include <sys/imgact_elf.h>
43#include <sys/lock.h>
44#include <sys/malloc.h>
45#include <sys/mutex.h>
46#include <sys/proc.h>
47#include <sys/signalvar.h>
48#include <sys/syscallsubr.h>
49#include <sys/sysent.h>
50#include <sys/sysproto.h>
51#include <sys/user.h>
52#include <sys/vnode.h>
53
54#include <vm/vm.h>
55#include <vm/vm_param.h>
56#include <vm/vm_page.h>
57#include <vm/vm_extern.h>
58#include <sys/exec.h>
59#include <sys/kernel.h>
60#include <sys/module.h>
61#include <machine/cpu.h>
62#include <machine/md_var.h>
63#include <sys/mutex.h>
64
65#include <vm/vm.h>
66#include <vm/vm_param.h>
67#include <vm/pmap.h>
68#include <vm/vm_map.h>
69#include <vm/vm_object.h>
70
71#include <i386/linux/linux.h>
72#include <i386/linux/linux_proto.h>
73#include <compat/linux/linux_signal.h>
74#include <compat/linux/linux_util.h>
75
76MODULE_VERSION(linux, 1);
77MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
78MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
79MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
80
81MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
82
83#if BYTE_ORDER == LITTLE_ENDIAN
84#define SHELLMAGIC      0x2123 /* #! */
85#else
86#define SHELLMAGIC      0x2321
87#endif
88
89/*
90 * Allow the sendsig functions to use the ldebug() facility
91 * even though they are not syscalls themselves. Map them
92 * to syscall 0. This is slightly less bogus than using
93 * ldebug(sigreturn).
94 */
95#define	LINUX_SYS_linux_rt_sendsig	0
96#define	LINUX_SYS_linux_sendsig		0
97
98extern char linux_sigcode[];
99extern int linux_szsigcode;
100
101extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
102
103SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
104
105static int	linux_fixup(register_t **stack_base,
106		    struct image_params *iparams);
107static int	elf_linux_fixup(register_t **stack_base,
108		    struct image_params *iparams);
109static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
110		    caddr_t *params);
111static void     linux_sendsig(sig_t catcher, int sig, sigset_t *mask,
112		    u_long code);
113
114/*
115 * Linux syscalls return negative errno's, we do positive and map them
116 */
117static int bsd_to_linux_errno[ELAST + 1] = {
118  	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
119 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
120 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
121 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
122 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
123	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
124	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
125	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
126  	-6, -6, -43, -42, -75, -6, -84
127};
128
129int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
130	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
131	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
132	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
133	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
134	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
135	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
136	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
137	0, LINUX_SIGUSR1, LINUX_SIGUSR2
138};
139
140int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
141	SIGHUP, SIGINT, SIGQUIT, SIGILL,
142	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
143	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
144	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
145	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
146	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
147	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
148	SIGIO, SIGURG, SIGSYS
149};
150
151#define LINUX_T_UNKNOWN  255
152static int _bsd_to_linux_trapcode[] = {
153	LINUX_T_UNKNOWN,	/* 0 */
154	6,			/* 1  T_PRIVINFLT */
155	LINUX_T_UNKNOWN,	/* 2 */
156	3,			/* 3  T_BPTFLT */
157	LINUX_T_UNKNOWN,	/* 4 */
158	LINUX_T_UNKNOWN,	/* 5 */
159	16,			/* 6  T_ARITHTRAP */
160	254,			/* 7  T_ASTFLT */
161	LINUX_T_UNKNOWN,	/* 8 */
162	13,			/* 9  T_PROTFLT */
163	1,			/* 10 T_TRCTRAP */
164	LINUX_T_UNKNOWN,	/* 11 */
165	14,			/* 12 T_PAGEFLT */
166	LINUX_T_UNKNOWN,	/* 13 */
167	17,			/* 14 T_ALIGNFLT */
168	LINUX_T_UNKNOWN,	/* 15 */
169	LINUX_T_UNKNOWN,	/* 16 */
170	LINUX_T_UNKNOWN,	/* 17 */
171	0,			/* 18 T_DIVIDE */
172	2,			/* 19 T_NMI */
173	4,			/* 20 T_OFLOW */
174	5,			/* 21 T_BOUND */
175	7,			/* 22 T_DNA */
176	8,			/* 23 T_DOUBLEFLT */
177	9,			/* 24 T_FPOPFLT */
178	10,			/* 25 T_TSSFLT */
179	11,			/* 26 T_SEGNPFLT */
180	12,			/* 27 T_STKFLT */
181	18,			/* 28 T_MCHK */
182	19,			/* 29 T_XMMFLT */
183	15			/* 30 T_RESERVED */
184};
185#define bsd_to_linux_trapcode(code) \
186    ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
187     _bsd_to_linux_trapcode[(code)]: \
188     LINUX_T_UNKNOWN)
189
190/*
191 * If FreeBSD & Linux have a difference of opinion about what a trap
192 * means, deal with it here.
193 *
194 * MPSAFE
195 */
196static int
197translate_traps(int signal, int trap_code)
198{
199	if (signal != SIGBUS)
200		return signal;
201	switch (trap_code) {
202	case T_PROTFLT:
203	case T_TSSFLT:
204	case T_DOUBLEFLT:
205	case T_PAGEFLT:
206		return SIGSEGV;
207	default:
208		return signal;
209	}
210}
211
212static int
213linux_fixup(register_t **stack_base, struct image_params *imgp)
214{
215	register_t *argv, *envp;
216
217	argv = *stack_base;
218	envp = *stack_base + (imgp->argc + 1);
219	(*stack_base)--;
220	**stack_base = (intptr_t)(void *)envp;
221	(*stack_base)--;
222	**stack_base = (intptr_t)(void *)argv;
223	(*stack_base)--;
224	**stack_base = imgp->argc;
225	return 0;
226}
227
228static int
229elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
230{
231	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
232	register_t *pos;
233
234	pos = *stack_base + (imgp->argc + imgp->envc + 2);
235
236	if (args->trace)
237		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
238	if (args->execfd != -1)
239		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
240	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
241	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
242	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
243	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
244	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
245	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
246	AUXARGS_ENTRY(pos, AT_BASE, args->base);
247	PROC_LOCK(imgp->proc);
248	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
249	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
250	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
251	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
252	PROC_UNLOCK(imgp->proc);
253	AUXARGS_ENTRY(pos, AT_NULL, 0);
254
255	free(imgp->auxargs, M_TEMP);
256	imgp->auxargs = NULL;
257
258	(*stack_base)--;
259	**stack_base = (long)imgp->argc;
260	return 0;
261}
262
263extern int _ucodesel, _udatasel;
264extern unsigned long linux_sznonrtsigcode;
265
266static void
267linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
268{
269	register struct thread *td = curthread;
270	register struct proc *p = td->td_proc;
271	register struct trapframe *regs;
272	struct l_rt_sigframe *fp, frame;
273	int oonstack;
274
275	PROC_LOCK_ASSERT(p, MA_OWNED);
276	regs = td->td_frame;
277	oonstack = sigonstack(regs->tf_esp);
278
279#ifdef DEBUG
280	if (ldebug(rt_sendsig))
281		printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
282		    catcher, sig, (void*)mask, code);
283#endif
284	/*
285	 * Allocate space for the signal handler context.
286	 */
287	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
288	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
289		fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp +
290		    p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe));
291	} else
292		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
293	PROC_UNLOCK(p);
294
295	/*
296	 * Build the argument list for the signal handler.
297	 */
298	if (p->p_sysent->sv_sigtbl)
299		if (sig <= p->p_sysent->sv_sigsize)
300			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
301
302	bzero(&frame, sizeof(frame));
303
304	frame.sf_handler = catcher;
305	frame.sf_sig = sig;
306	frame.sf_siginfo = &fp->sf_si;
307	frame.sf_ucontext = &fp->sf_sc;
308
309	/* Fill in POSIX parts */
310	frame.sf_si.lsi_signo = sig;
311	frame.sf_si.lsi_code = code;
312	frame.sf_si.lsi_addr = (void *)regs->tf_err;
313
314	/*
315	 * Build the signal context to be used by sigreturn.
316	 */
317	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
318	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
319
320	PROC_LOCK(p);
321	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
322	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
323	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
324	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
325	PROC_UNLOCK(p);
326
327	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
328
329	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
330	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
331	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
332	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
333	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
334	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
335	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
336	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
337	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
338	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
339	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
340	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
341	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
342	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
343	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
344	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
345	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
346	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
347	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
348
349#ifdef DEBUG
350	if (ldebug(rt_sendsig))
351		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
352		    frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
353		    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
354#endif
355
356	if (copyout(&frame, fp, sizeof(frame)) != 0) {
357		/*
358		 * Process has trashed its stack; give it an illegal
359		 * instruction to halt it in its tracks.
360		 */
361#ifdef DEBUG
362		if (ldebug(rt_sendsig))
363			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
364			    fp, oonstack);
365#endif
366		PROC_LOCK(p);
367		sigexit(td, SIGILL);
368	}
369
370	/*
371	 * Build context to run handler in.
372	 */
373	regs->tf_esp = (int)fp;
374	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
375	    linux_sznonrtsigcode;
376	regs->tf_eflags &= ~(PSL_T | PSL_VM);
377	regs->tf_cs = _ucodesel;
378	regs->tf_ds = _udatasel;
379	regs->tf_es = _udatasel;
380	regs->tf_fs = _udatasel;
381	regs->tf_ss = _udatasel;
382	PROC_LOCK(p);
383}
384
385
386/*
387 * Send an interrupt to process.
388 *
389 * Stack is set up to allow sigcode stored
390 * in u. to call routine, followed by kcall
391 * to sigreturn routine below.  After sigreturn
392 * resets the signal mask, the stack, and the
393 * frame pointer, it returns to the user
394 * specified pc, psl.
395 */
396static void
397linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
398{
399	register struct thread *td = curthread;
400	register struct proc *p = td->td_proc;
401	register struct trapframe *regs;
402	struct l_sigframe *fp, frame;
403	l_sigset_t lmask;
404	int oonstack, i;
405
406	PROC_LOCK_ASSERT(p, MA_OWNED);
407	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
408		/* Signal handler installed with SA_SIGINFO. */
409		linux_rt_sendsig(catcher, sig, mask, code);
410		return;
411	}
412
413	regs = td->td_frame;
414	oonstack = sigonstack(regs->tf_esp);
415
416#ifdef DEBUG
417	if (ldebug(sendsig))
418		printf(ARGS(sendsig, "%p, %d, %p, %lu"),
419		    catcher, sig, (void*)mask, code);
420#endif
421
422	/*
423	 * Allocate space for the signal handler context.
424	 */
425	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
426	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
427		fp = (struct l_sigframe *)(p->p_sigstk.ss_sp +
428		    p->p_sigstk.ss_size - sizeof(struct l_sigframe));
429	} else
430		fp = (struct l_sigframe *)regs->tf_esp - 1;
431	PROC_UNLOCK(p);
432
433	/*
434	 * Build the argument list for the signal handler.
435	 */
436	if (p->p_sysent->sv_sigtbl)
437		if (sig <= p->p_sysent->sv_sigsize)
438			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
439
440	bzero(&frame, sizeof(frame));
441
442	frame.sf_handler = catcher;
443	frame.sf_sig = sig;
444
445	bsd_to_linux_sigset(mask, &lmask);
446
447	/*
448	 * Build the signal context to be used by sigreturn.
449	 */
450	frame.sf_sc.sc_mask   = lmask.__bits[0];
451	frame.sf_sc.sc_gs     = rgs();
452	frame.sf_sc.sc_fs     = regs->tf_fs;
453	frame.sf_sc.sc_es     = regs->tf_es;
454	frame.sf_sc.sc_ds     = regs->tf_ds;
455	frame.sf_sc.sc_edi    = regs->tf_edi;
456	frame.sf_sc.sc_esi    = regs->tf_esi;
457	frame.sf_sc.sc_ebp    = regs->tf_ebp;
458	frame.sf_sc.sc_ebx    = regs->tf_ebx;
459	frame.sf_sc.sc_edx    = regs->tf_edx;
460	frame.sf_sc.sc_ecx    = regs->tf_ecx;
461	frame.sf_sc.sc_eax    = regs->tf_eax;
462	frame.sf_sc.sc_eip    = regs->tf_eip;
463	frame.sf_sc.sc_cs     = regs->tf_cs;
464	frame.sf_sc.sc_eflags = regs->tf_eflags;
465	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
466	frame.sf_sc.sc_ss     = regs->tf_ss;
467	frame.sf_sc.sc_err    = regs->tf_err;
468	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
469
470	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
471		frame.sf_extramask[i] = lmask.__bits[i+1];
472
473	if (copyout(&frame, fp, sizeof(frame)) != 0) {
474		/*
475		 * Process has trashed its stack; give it an illegal
476		 * instruction to halt it in its tracks.
477		 */
478		PROC_LOCK(p);
479		sigexit(td, SIGILL);
480	}
481
482	/*
483	 * Build context to run handler in.
484	 */
485	regs->tf_esp = (int)fp;
486	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
487	regs->tf_eflags &= ~(PSL_T | PSL_VM);
488	regs->tf_cs = _ucodesel;
489	regs->tf_ds = _udatasel;
490	regs->tf_es = _udatasel;
491	regs->tf_fs = _udatasel;
492	regs->tf_ss = _udatasel;
493	PROC_LOCK(p);
494}
495
496/*
497 * System call to cleanup state after a signal
498 * has been taken.  Reset signal mask and
499 * stack state from context left by sendsig (above).
500 * Return to previous pc and psl as specified by
501 * context left by sendsig. Check carefully to
502 * make sure that the user has not modified the
503 * psl to gain improper privileges or to cause
504 * a machine fault.
505 */
506int
507linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
508{
509	struct proc *p = td->td_proc;
510	struct l_sigframe frame;
511	register struct trapframe *regs;
512	l_sigset_t lmask;
513	int eflags, i;
514
515	regs = td->td_frame;
516
517#ifdef DEBUG
518	if (ldebug(sigreturn))
519		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
520#endif
521	/*
522	 * The trampoline code hands us the sigframe.
523	 * It is unsafe to keep track of it ourselves, in the event that a
524	 * program jumps out of a signal handler.
525	 */
526	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
527		return (EFAULT);
528
529	/*
530	 * Check for security violations.
531	 */
532#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
533	eflags = frame.sf_sc.sc_eflags;
534	/*
535	 * XXX do allow users to change the privileged flag PSL_RF.  The
536	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
537	 * sometimes set it there too.  tf_eflags is kept in the signal
538	 * context during signal handling and there is no other place
539	 * to remember it, so the PSL_RF bit may be corrupted by the
540	 * signal handler without us knowing.  Corruption of the PSL_RF
541	 * bit at worst causes one more or one less debugger trap, so
542	 * allowing it is fairly harmless.
543	 */
544	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
545    		return(EINVAL);
546
547	/*
548	 * Don't allow users to load a valid privileged %cs.  Let the
549	 * hardware check for invalid selectors, excess privilege in
550	 * other selectors, invalid %eip's and invalid %esp's.
551	 */
552#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
553	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
554		trapsignal(p, SIGBUS, T_PROTFLT);
555		return(EINVAL);
556	}
557
558	lmask.__bits[0] = frame.sf_sc.sc_mask;
559	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
560		lmask.__bits[i+1] = frame.sf_extramask[i];
561	PROC_LOCK(p);
562	linux_to_bsd_sigset(&lmask, &p->p_sigmask);
563	SIG_CANTMASK(p->p_sigmask);
564	signotify(p);
565	PROC_UNLOCK(p);
566
567	/*
568	 * Restore signal context.
569	 */
570	/* %gs was restored by the trampoline. */
571	regs->tf_fs     = frame.sf_sc.sc_fs;
572	regs->tf_es     = frame.sf_sc.sc_es;
573	regs->tf_ds     = frame.sf_sc.sc_ds;
574	regs->tf_edi    = frame.sf_sc.sc_edi;
575	regs->tf_esi    = frame.sf_sc.sc_esi;
576	regs->tf_ebp    = frame.sf_sc.sc_ebp;
577	regs->tf_ebx    = frame.sf_sc.sc_ebx;
578	regs->tf_edx    = frame.sf_sc.sc_edx;
579	regs->tf_ecx    = frame.sf_sc.sc_ecx;
580	regs->tf_eax    = frame.sf_sc.sc_eax;
581	regs->tf_eip    = frame.sf_sc.sc_eip;
582	regs->tf_cs     = frame.sf_sc.sc_cs;
583	regs->tf_eflags = eflags;
584	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
585	regs->tf_ss     = frame.sf_sc.sc_ss;
586
587	return (EJUSTRETURN);
588}
589
590/*
591 * System call to cleanup state after a signal
592 * has been taken.  Reset signal mask and
593 * stack state from context left by rt_sendsig (above).
594 * Return to previous pc and psl as specified by
595 * context left by sendsig. Check carefully to
596 * make sure that the user has not modified the
597 * psl to gain improper privileges or to cause
598 * a machine fault.
599 */
600int
601linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
602{
603	struct proc *p = td->td_proc;
604	struct l_ucontext uc;
605	struct l_sigcontext *context;
606	l_stack_t *lss;
607	stack_t ss;
608	register struct trapframe *regs;
609	int eflags;
610
611	regs = td->td_frame;
612
613#ifdef DEBUG
614	if (ldebug(rt_sigreturn))
615		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
616#endif
617	/*
618	 * The trampoline code hands us the ucontext.
619	 * It is unsafe to keep track of it ourselves, in the event that a
620	 * program jumps out of a signal handler.
621	 */
622	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
623		return (EFAULT);
624
625	context = &uc.uc_mcontext;
626
627	/*
628	 * Check for security violations.
629	 */
630#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
631	eflags = context->sc_eflags;
632	/*
633	 * XXX do allow users to change the privileged flag PSL_RF.  The
634	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
635	 * sometimes set it there too.  tf_eflags is kept in the signal
636	 * context during signal handling and there is no other place
637	 * to remember it, so the PSL_RF bit may be corrupted by the
638	 * signal handler without us knowing.  Corruption of the PSL_RF
639	 * bit at worst causes one more or one less debugger trap, so
640	 * allowing it is fairly harmless.
641	 */
642	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
643    		return(EINVAL);
644
645	/*
646	 * Don't allow users to load a valid privileged %cs.  Let the
647	 * hardware check for invalid selectors, excess privilege in
648	 * other selectors, invalid %eip's and invalid %esp's.
649	 */
650#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
651	if (!CS_SECURE(context->sc_cs)) {
652		trapsignal(p, SIGBUS, T_PROTFLT);
653		return(EINVAL);
654	}
655
656	PROC_LOCK(p);
657	linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
658	SIG_CANTMASK(p->p_sigmask);
659	signotify(p);
660	PROC_UNLOCK(p);
661
662	/*
663	 * Restore signal context
664	 */
665	/* %gs was restored by the trampoline. */
666	regs->tf_fs     = context->sc_fs;
667	regs->tf_es     = context->sc_es;
668	regs->tf_ds     = context->sc_ds;
669	regs->tf_edi    = context->sc_edi;
670	regs->tf_esi    = context->sc_esi;
671	regs->tf_ebp    = context->sc_ebp;
672	regs->tf_ebx    = context->sc_ebx;
673	regs->tf_edx    = context->sc_edx;
674	regs->tf_ecx    = context->sc_ecx;
675	regs->tf_eax    = context->sc_eax;
676	regs->tf_eip    = context->sc_eip;
677	regs->tf_cs     = context->sc_cs;
678	regs->tf_eflags = eflags;
679	regs->tf_esp    = context->sc_esp_at_signal;
680	regs->tf_ss     = context->sc_ss;
681
682	/*
683	 * call sigaltstack & ignore results..
684	 */
685	lss = &uc.uc_stack;
686	ss.ss_sp = lss->ss_sp;
687	ss.ss_size = lss->ss_size;
688	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
689
690#ifdef DEBUG
691	if (ldebug(rt_sigreturn))
692		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
693		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
694#endif
695	(void)kern_sigaltstack(td, &ss, NULL);
696
697	return (EJUSTRETURN);
698}
699
700/*
701 * MPSAFE
702 */
703static void
704linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
705{
706	args[0] = tf->tf_ebx;
707	args[1] = tf->tf_ecx;
708	args[2] = tf->tf_edx;
709	args[3] = tf->tf_esi;
710	args[4] = tf->tf_edi;
711	args[5] = tf->tf_ebp;	/* Unconfirmed */
712	*params = NULL;		/* no copyin */
713}
714
715
716
717/*
718 * Dump core, into a file named as described in the comments for
719 * expand_name(), unless the process was setuid/setgid.
720 */
721static int
722linux_aout_coredump(struct thread *td, struct vnode *vp, off_t limit)
723{
724	struct proc *p = td->td_proc;
725	struct ucred *cred = td->td_ucred;
726	struct vmspace *vm = p->p_vmspace;
727	char *tempuser;
728	int error;
729
730	if (ctob((uarea_pages + kstack_pages) +
731	    vm->vm_dsize + vm->vm_ssize) >= limit)
732		return (EFAULT);
733	tempuser = malloc(ctob(uarea_pages + kstack_pages), M_TEMP,
734	    M_WAITOK | M_ZERO);
735	if (tempuser == NULL)
736		return (ENOMEM);
737	PROC_LOCK(p);
738	fill_kinfo_proc(p, &p->p_uarea->u_kproc);
739	PROC_UNLOCK(p);
740	bcopy(p->p_uarea, tempuser, sizeof(struct user));
741	bcopy(td->td_frame,
742	    tempuser + ctob(uarea_pages) +
743	    ((caddr_t)td->td_frame - (caddr_t)td->td_kstack),
744	    sizeof(struct trapframe));
745	error = vn_rdwr(UIO_WRITE, vp, (caddr_t)tempuser,
746	    ctob(uarea_pages + kstack_pages),
747	    (off_t)0, UIO_SYSSPACE, IO_UNIT, cred, NOCRED,
748	    (int *)NULL, td);
749	free(tempuser, M_TEMP);
750	if (error == 0)
751		error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr,
752		    (int)ctob(vm->vm_dsize),
753		    (off_t)ctob(uarea_pages + kstack_pages), UIO_USERSPACE,
754		    IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td);
755	if (error == 0)
756		error = vn_rdwr_inchunks(UIO_WRITE, vp,
757		    (caddr_t)trunc_page(USRSTACK - ctob(vm->vm_ssize)),
758		    round_page(ctob(vm->vm_ssize)),
759		    (off_t)ctob(uarea_pages + kstack_pages) +
760		        ctob(vm->vm_dsize), UIO_USERSPACE,
761		    IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td);
762	return (error);
763}
764/*
765 * If a linux binary is exec'ing something, try this image activator
766 * first.  We override standard shell script execution in order to
767 * be able to modify the interpreter path.  We only do this if a linux
768 * binary is doing the exec, so we do not create an EXEC module for it.
769 */
770static int	exec_linux_imgact_try(struct image_params *iparams);
771
772static int
773exec_linux_imgact_try(struct image_params *imgp)
774{
775    const char *head = (const char *)imgp->image_header;
776    int error = -1;
777
778    /*
779     * The interpreter for shell scripts run from a linux binary needs
780     * to be located in /compat/linux if possible in order to recursively
781     * maintain linux path emulation.
782     */
783    if (((const short *)head)[0] == SHELLMAGIC) {
784	    /*
785	     * Run our normal shell image activator.  If it succeeds attempt
786	     * to use the alternate path for the interpreter.  If an alternate
787	     * path is found, use our stringspace to store it.
788	     */
789	    if ((error = exec_shell_imgact(imgp)) == 0) {
790		    char *rpath = NULL;
791
792		    linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL,
793			imgp->interpreter_name, &rpath, 0);
794		    if (rpath != imgp->interpreter_name) {
795			    int len = strlen(rpath) + 1;
796
797			    if (len <= MAXSHELLCMDLEN) {
798				    memcpy(imgp->interpreter_name, rpath, len);
799			    }
800			    free(rpath, M_TEMP);
801		    }
802	    }
803    }
804    return(error);
805}
806
807struct sysentvec linux_sysvec = {
808	LINUX_SYS_MAXSYSCALL,
809	linux_sysent,
810	0xff,
811	LINUX_SIGTBLSZ,
812	bsd_to_linux_signal,
813	ELAST + 1,
814	bsd_to_linux_errno,
815	translate_traps,
816	linux_fixup,
817	linux_sendsig,
818	linux_sigcode,
819	&linux_szsigcode,
820	linux_prepsyscall,
821	"Linux a.out",
822	linux_aout_coredump,
823	exec_linux_imgact_try,
824	LINUX_MINSIGSTKSZ,
825	PAGE_SIZE,
826	VM_MIN_ADDRESS,
827	VM_MAXUSER_ADDRESS,
828	USRSTACK,
829	PS_STRINGS,
830	VM_PROT_ALL,
831	exec_copyout_strings,
832	exec_setregs
833};
834
835struct sysentvec elf_linux_sysvec = {
836	LINUX_SYS_MAXSYSCALL,
837	linux_sysent,
838	0xff,
839	LINUX_SIGTBLSZ,
840	bsd_to_linux_signal,
841	ELAST + 1,
842	bsd_to_linux_errno,
843	translate_traps,
844	elf_linux_fixup,
845	linux_sendsig,
846	linux_sigcode,
847	&linux_szsigcode,
848	linux_prepsyscall,
849	"Linux ELF",
850	elf32_coredump,
851	exec_linux_imgact_try,
852	LINUX_MINSIGSTKSZ,
853	PAGE_SIZE,
854	VM_MIN_ADDRESS,
855	VM_MAXUSER_ADDRESS,
856	USRSTACK,
857	PS_STRINGS,
858	VM_PROT_ALL,
859	exec_copyout_strings,
860	exec_setregs
861};
862
863static Elf32_Brandinfo linux_brand = {
864					ELFOSABI_LINUX,
865					EM_386,
866					"Linux",
867					"/compat/linux",
868					"/lib/ld-linux.so.1",
869					&elf_linux_sysvec
870				 };
871
872static Elf32_Brandinfo linux_glibc2brand = {
873					ELFOSABI_LINUX,
874					EM_386,
875					"Linux",
876					"/compat/linux",
877					"/lib/ld-linux.so.2",
878					&elf_linux_sysvec
879				 };
880
881Elf32_Brandinfo *linux_brandlist[] = {
882					&linux_brand,
883					&linux_glibc2brand,
884					NULL
885				};
886
887static int
888linux_elf_modevent(module_t mod, int type, void *data)
889{
890	Elf32_Brandinfo **brandinfo;
891	int error;
892	struct linux_ioctl_handler **lihp;
893
894	error = 0;
895
896	switch(type) {
897	case MOD_LOAD:
898		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
899		     ++brandinfo)
900			if (elf32_insert_brand_entry(*brandinfo) < 0)
901				error = EINVAL;
902		if (error == 0) {
903			SET_FOREACH(lihp, linux_ioctl_handler_set)
904				linux_ioctl_register_handler(*lihp);
905			if (bootverbose)
906				printf("Linux ELF exec handler installed\n");
907		} else
908			printf("cannot insert Linux ELF brand handler\n");
909		break;
910	case MOD_UNLOAD:
911		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
912		     ++brandinfo)
913			if (elf32_brand_inuse(*brandinfo))
914				error = EBUSY;
915		if (error == 0) {
916			for (brandinfo = &linux_brandlist[0];
917			     *brandinfo != NULL; ++brandinfo)
918				if (elf32_remove_brand_entry(*brandinfo) < 0)
919					error = EINVAL;
920		}
921		if (error == 0) {
922			SET_FOREACH(lihp, linux_ioctl_handler_set)
923				linux_ioctl_unregister_handler(*lihp);
924			if (bootverbose)
925				printf("Linux ELF exec handler removed\n");
926		} else
927			printf("Could not deinstall ELF interpreter entry\n");
928		break;
929	default:
930		break;
931	}
932	return error;
933}
934
935static moduledata_t linux_elf_mod = {
936	"linuxelf",
937	linux_elf_modevent,
938	0
939};
940
941DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
942