linux_sysvec.c revision 59368
1/*-
2 * Copyright (c) 1994-1996 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software withough specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: head/sys/i386/linux/linux_sysvec.c 59368 2000-04-18 15:15:39Z phk $
29 */
30
31/* XXX we use functions that might not exist. */
32#include "opt_compat.h"
33
34#ifndef COMPAT_43
35#error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36#endif
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/proc.h>
41#include <sys/sysent.h>
42#include <sys/imgact.h>
43#include <sys/imgact_aout.h>
44#include <sys/imgact_elf.h>
45#include <sys/signalvar.h>
46#include <sys/malloc.h>
47#include <vm/vm.h>
48#include <vm/vm_param.h>
49#include <vm/vm_page.h>
50#include <vm/vm_extern.h>
51#include <sys/exec.h>
52#include <sys/kernel.h>
53#include <sys/module.h>
54#include <machine/cpu.h>
55
56#include <i386/linux/linux.h>
57#include <i386/linux/linux_proto.h>
58
59MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
60
61extern char linux_sigcode[];
62extern int linux_szsigcode;
63
64extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
65
66extern struct linker_set linux_ioctl_handler_set;
67
68static int	linux_fixup __P((register_t **stack_base,
69				 struct image_params *iparams));
70static int	elf_linux_fixup __P((register_t **stack_base,
71				     struct image_params *iparams));
72static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
73				       u_int *code, caddr_t *params));
74static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
75				   u_long code));
76
77/*
78 * Linux syscalls return negative errno's, we do positive and map them
79 */
80static int bsd_to_linux_errno[ELAST + 1] = {
81  	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
82 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
83 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
84 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
85 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
86	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
87	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
88	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
89  	-6, -6, -43, -42, -75, -6, -84
90};
91
92int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
93	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
94	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
95	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
96	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
97	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
98	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
99	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
100	0, LINUX_SIGUSR1, LINUX_SIGUSR2
101};
102
103int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
104	SIGHUP, SIGINT, SIGQUIT, SIGILL,
105	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
106	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
107	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
108	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
109	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
110	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
111	SIGIO, SIGURG, 0
112};
113
114/*
115 * If FreeBSD & Linux have a difference of opinion about what a trap
116 * means, deal with it here.
117 */
118static int
119translate_traps(int signal, int trap_code)
120{
121	if (signal != SIGBUS)
122		return signal;
123	switch (trap_code) {
124	case T_PROTFLT:
125	case T_TSSFLT:
126	case T_DOUBLEFLT:
127	case T_PAGEFLT:
128		return SIGSEGV;
129	default:
130		return signal;
131	}
132}
133
134static int
135linux_fixup(register_t **stack_base, struct image_params *imgp)
136{
137	register_t *argv, *envp;
138
139	argv = *stack_base;
140	envp = *stack_base + (imgp->argc + 1);
141	(*stack_base)--;
142	**stack_base = (intptr_t)(void *)envp;
143	(*stack_base)--;
144	**stack_base = (intptr_t)(void *)argv;
145	(*stack_base)--;
146	**stack_base = imgp->argc;
147	return 0;
148}
149
150static int
151elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
152{
153	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
154	register_t *pos;
155
156	pos = *stack_base + (imgp->argc + imgp->envc + 2);
157
158	if (args->trace) {
159		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
160	}
161	if (args->execfd != -1) {
162		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
163	}
164	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
165	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
166	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
167	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
168	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
169	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
170	AUXARGS_ENTRY(pos, AT_BASE, args->base);
171	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid);
172	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid);
173	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid);
174	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid);
175	AUXARGS_ENTRY(pos, AT_NULL, 0);
176
177	free(imgp->auxargs, M_TEMP);
178	imgp->auxargs = NULL;
179
180	(*stack_base)--;
181	**stack_base = (long)imgp->argc;
182	return 0;
183}
184
185extern int _ucodesel, _udatasel;
186
187/*
188 * Send an interrupt to process.
189 *
190 * Stack is set up to allow sigcode stored
191 * in u. to call routine, followed by kcall
192 * to sigreturn routine below.  After sigreturn
193 * resets the signal mask, the stack, and the
194 * frame pointer, it returns to the user
195 * specified pc, psl.
196 */
197
198static void
199linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
200{
201	register struct proc *p = curproc;
202	register struct trapframe *regs;
203	struct linux_sigframe *fp, frame;
204	struct sigacts *psp = p->p_sigacts;
205	int oonstack;
206
207	regs = p->p_md.md_regs;
208	oonstack = p->p_sigstk.ss_flags & SS_ONSTACK;
209
210#ifdef DEBUG
211	printf("Linux-emul(%ld): linux_sendsig(%p, %d, %p, %lu)\n",
212	    (long)p->p_pid, catcher, sig, (void*)mask, code);
213#endif
214	/*
215	 * Allocate space for the signal handler context.
216	 */
217	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
218	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
219		fp = (struct linux_sigframe *)(p->p_sigstk.ss_sp +
220		    p->p_sigstk.ss_size - sizeof(struct linux_sigframe));
221		p->p_sigstk.ss_flags |= SS_ONSTACK;
222	} else {
223		fp = (struct linux_sigframe *)regs->tf_esp - 1;
224	}
225
226	/*
227	 * grow() will return FALSE if the fp will not fit inside the stack
228	 *	and the stack can not be grown. useracc will return FALSE
229	 *	if access is denied.
230	 */
231	if ((grow_stack (p, (int)fp) == FALSE) ||
232	    !useracc((caddr_t)fp, sizeof (struct linux_sigframe),
233	    VM_PROT_WRITE)) {
234		/*
235		 * Process has trashed its stack; give it an illegal
236		 * instruction to halt it in its tracks.
237		 */
238		SIGACTION(p, SIGILL) = SIG_DFL;
239		SIGDELSET(p->p_sigignore, SIGILL);
240		SIGDELSET(p->p_sigcatch, SIGILL);
241		SIGDELSET(p->p_sigmask, SIGILL);
242		psignal(p, SIGILL);
243		return;
244	}
245
246	/*
247	 * Build the argument list for the signal handler.
248	 */
249	if (p->p_sysent->sv_sigtbl)
250		if (sig <= p->p_sysent->sv_sigsize)
251			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
252
253	frame.sf_handler = catcher;
254	frame.sf_sig = sig;
255
256	/*
257	 * Build the signal context to be used by sigreturn.
258	 */
259	frame.sf_sc.sc_mask   = mask->__bits[0];
260	frame.sf_sc.sc_gs     = rgs();
261	frame.sf_sc.sc_fs     = regs->tf_fs;
262	frame.sf_sc.sc_es     = regs->tf_es;
263	frame.sf_sc.sc_ds     = regs->tf_ds;
264	frame.sf_sc.sc_edi    = regs->tf_edi;
265	frame.sf_sc.sc_esi    = regs->tf_esi;
266	frame.sf_sc.sc_ebp    = regs->tf_ebp;
267	frame.sf_sc.sc_ebx    = regs->tf_ebx;
268	frame.sf_sc.sc_edx    = regs->tf_edx;
269	frame.sf_sc.sc_ecx    = regs->tf_ecx;
270	frame.sf_sc.sc_eax    = regs->tf_eax;
271	frame.sf_sc.sc_eip    = regs->tf_eip;
272	frame.sf_sc.sc_cs     = regs->tf_cs;
273	frame.sf_sc.sc_eflags = regs->tf_eflags;
274	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
275	frame.sf_sc.sc_ss     = regs->tf_ss;
276	frame.sf_sc.sc_err    = regs->tf_err;
277	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
278
279	if (copyout(&frame, fp, sizeof(frame)) != 0) {
280		/*
281		 * Process has trashed its stack; give it an illegal
282		 * instruction to halt it in its tracks.
283		 */
284		sigexit(p, SIGILL);
285		/* NOTREACHED */
286	}
287
288	/*
289	 * Build context to run handler in.
290	 */
291	regs->tf_esp = (int)fp;
292	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
293	regs->tf_eflags &= ~PSL_VM;
294	regs->tf_cs = _ucodesel;
295	regs->tf_ds = _udatasel;
296	regs->tf_es = _udatasel;
297	regs->tf_fs = _udatasel;
298	load_gs(_udatasel);
299	regs->tf_ss = _udatasel;
300}
301
302/*
303 * System call to cleanup state after a signal
304 * has been taken.  Reset signal mask and
305 * stack state from context left by sendsig (above).
306 * Return to previous pc and psl as specified by
307 * context left by sendsig. Check carefully to
308 * make sure that the user has not modified the
309 * psl to gain improper privileges or to cause
310 * a machine fault.
311 */
312int
313linux_sigreturn(p, args)
314	struct proc *p;
315	struct linux_sigreturn_args *args;
316{
317	struct linux_sigcontext *scp, context;
318	register struct trapframe *regs;
319	int eflags;
320
321	regs = p->p_md.md_regs;
322
323#ifdef DEBUG
324	printf("Linux-emul(%ld): linux_sigreturn(%p)\n",
325	    (long)p->p_pid, (void *)args->scp);
326#endif
327	/*
328	 * The trampoline code hands us the context.
329	 * It is unsafe to keep track of it ourselves, in the event that a
330	 * program jumps out of a signal handler.
331	 */
332	scp = SCARG(args,scp);
333	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
334		return (EFAULT);
335
336	/*
337	 * Check for security violations.
338	 */
339#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
340	eflags = context.sc_eflags;
341	/*
342	 * XXX do allow users to change the privileged flag PSL_RF.  The
343	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
344	 * sometimes set it there too.  tf_eflags is kept in the signal
345	 * context during signal handling and there is no other place
346	 * to remember it, so the PSL_RF bit may be corrupted by the
347	 * signal handler without us knowing.  Corruption of the PSL_RF
348	 * bit at worst causes one more or one less debugger trap, so
349	 * allowing it is fairly harmless.
350	 */
351	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
352    		return(EINVAL);
353	}
354
355	/*
356	 * Don't allow users to load a valid privileged %cs.  Let the
357	 * hardware check for invalid selectors, excess privilege in
358	 * other selectors, invalid %eip's and invalid %esp's.
359	 */
360#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
361	if (!CS_SECURE(context.sc_cs)) {
362		trapsignal(p, SIGBUS, T_PROTFLT);
363		return(EINVAL);
364	}
365
366	p->p_sigstk.ss_flags &= ~SS_ONSTACK;
367	SIGSETOLD(p->p_sigmask, context.sc_mask);
368	SIG_CANTMASK(p->p_sigmask);
369
370	/*
371	 * Restore signal context.
372	 */
373	/* %gs was restored by the trampoline. */
374	regs->tf_fs     = context.sc_fs;
375	regs->tf_es     = context.sc_es;
376	regs->tf_ds     = context.sc_ds;
377	regs->tf_edi    = context.sc_edi;
378	regs->tf_esi    = context.sc_esi;
379	regs->tf_ebp    = context.sc_ebp;
380	regs->tf_ebx    = context.sc_ebx;
381	regs->tf_edx    = context.sc_edx;
382	regs->tf_ecx    = context.sc_ecx;
383	regs->tf_eax    = context.sc_eax;
384	regs->tf_eip    = context.sc_eip;
385	regs->tf_cs     = context.sc_cs;
386	regs->tf_eflags = eflags;
387	regs->tf_esp    = context.sc_esp_at_signal;
388	regs->tf_ss     = context.sc_ss;
389
390	return (EJUSTRETURN);
391}
392
393static void
394linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
395{
396	args[0] = tf->tf_ebx;
397	args[1] = tf->tf_ecx;
398	args[2] = tf->tf_edx;
399	args[3] = tf->tf_esi;
400	args[4] = tf->tf_edi;
401	*params = NULL;		/* no copyin */
402}
403
404struct sysentvec linux_sysvec = {
405	LINUX_SYS_MAXSYSCALL,
406	linux_sysent,
407	0xff,
408	LINUX_SIGTBLSZ,
409	bsd_to_linux_signal,
410	ELAST + 1,
411	bsd_to_linux_errno,
412	translate_traps,
413	linux_fixup,
414	linux_sendsig,
415	linux_sigcode,
416	&linux_szsigcode,
417	linux_prepsyscall,
418	"Linux a.out",
419	aout_coredump
420};
421
422struct sysentvec elf_linux_sysvec = {
423	LINUX_SYS_MAXSYSCALL,
424	linux_sysent,
425	0xff,
426	LINUX_SIGTBLSZ,
427	bsd_to_linux_signal,
428	ELAST + 1,
429	bsd_to_linux_errno,
430	translate_traps,
431	elf_linux_fixup,
432	linux_sendsig,
433	linux_sigcode,
434	&linux_szsigcode,
435	linux_prepsyscall,
436	"Linux ELF",
437	elf_coredump
438};
439
440static Elf32_Brandinfo linux_brand = {
441					ELFOSABI_LINUX,
442					"/compat/linux",
443					"/lib/ld-linux.so.1",
444					&elf_linux_sysvec
445				 };
446
447static Elf32_Brandinfo linux_glibc2brand = {
448					ELFOSABI_LINUX,
449					"/compat/linux",
450					"/lib/ld-linux.so.2",
451					&elf_linux_sysvec
452				 };
453
454Elf32_Brandinfo *linux_brandlist[] = {
455					&linux_brand,
456					&linux_glibc2brand,
457					NULL
458				};
459
460static int
461linux_elf_modevent(module_t mod, int type, void *data)
462{
463	Elf32_Brandinfo **brandinfo;
464	int error;
465
466	error = 0;
467
468	switch(type) {
469	case MOD_LOAD:
470		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
471		     ++brandinfo)
472			if (elf_insert_brand_entry(*brandinfo) < 0)
473				error = EINVAL;
474		if (error)
475			printf("cannot insert Linux elf brand handler\n");
476		else {
477			linux_ioctl_register_handlers(&linux_ioctl_handler_set);
478			if (bootverbose)
479				printf("Linux-ELF exec handler installed\n");
480		}
481		break;
482	case MOD_UNLOAD:
483		linux_ioctl_unregister_handlers(&linux_ioctl_handler_set);
484		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
485		     ++brandinfo)
486			if (elf_brand_inuse(*brandinfo))
487				error = EBUSY;
488
489		if (error == 0) {
490			for (brandinfo = &linux_brandlist[0];
491			     *brandinfo != NULL; ++brandinfo)
492				if (elf_remove_brand_entry(*brandinfo) < 0)
493					error = EINVAL;
494		}
495		if (error)
496			printf("Could not deinstall ELF interpreter entry\n");
497		else if (bootverbose)
498			printf("Linux-elf exec handler removed\n");
499		break;
500	default:
501		break;
502	}
503	return error;
504}
505static moduledata_t linux_elf_mod = {
506	"linuxelf",
507	linux_elf_modevent,
508	0
509};
510DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
511