linux_sysvec.c revision 54122
1/*-
2 * Copyright (c) 1994-1996 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software withough specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: head/sys/i386/linux/linux_sysvec.c 54122 1999-12-04 11:10:22Z marcel $
29 */
30
31/* XXX we use functions that might not exist. */
32#include "opt_compat.h"
33
34#ifndef COMPAT_43
35#error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36#endif
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/buf.h>
41#include <sys/proc.h>
42#include <sys/sysent.h>
43#include <sys/imgact.h>
44#include <sys/imgact_aout.h>
45#include <sys/imgact_elf.h>
46#include <sys/signalvar.h>
47#include <sys/malloc.h>
48#include <vm/vm.h>
49#include <vm/vm_param.h>
50#include <vm/vm_page.h>
51#include <vm/vm_extern.h>
52#include <sys/exec.h>
53#include <sys/kernel.h>
54#include <sys/module.h>
55#include <machine/cpu.h>
56
57#include <i386/linux/linux.h>
58#include <i386/linux/linux_proto.h>
59
60MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
61
62extern char linux_sigcode[];
63extern int linux_szsigcode;
64
65extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
66
67extern struct linker_set linux_ioctl_handler_set;
68
69static int	linux_fixup __P((long **stack_base,
70				 struct image_params *iparams));
71static int	elf_linux_fixup __P((long **stack_base,
72				     struct image_params *iparams));
73static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
74				       u_int *code, caddr_t *params));
75static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
76				   u_long code));
77
78/*
79 * Linux syscalls return negative errno's, we do positive and map them
80 */
81static int bsd_to_linux_errno[ELAST + 1] = {
82  	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
83 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
84 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
85 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
86 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
87	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
88	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
89	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
90  	-6, -6, -43, -42, -75, -6, -84
91};
92
93int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
94	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
95	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
96	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
97	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
98	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
99	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
100	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
101	0, LINUX_SIGUSR1, LINUX_SIGUSR2
102};
103
104int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
105	SIGHUP, SIGINT, SIGQUIT, SIGILL,
106	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
107	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
108	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
109	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
110	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
111	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
112	SIGIO, SIGURG, 0
113};
114
115/*
116 * If FreeBSD & Linux have a difference of opinion about what a trap
117 * means, deal with it here.
118 */
119static int
120translate_traps(int signal, int trap_code)
121{
122	if (signal != SIGBUS)
123		return signal;
124	switch (trap_code) {
125	case T_PROTFLT:
126	case T_TSSFLT:
127	case T_DOUBLEFLT:
128	case T_PAGEFLT:
129		return SIGSEGV;
130	default:
131		return signal;
132	}
133}
134
135static int
136linux_fixup(long **stack_base, struct image_params *imgp)
137{
138	long *argv, *envp;
139
140	argv = *stack_base;
141	envp = *stack_base + (imgp->argc + 1);
142	(*stack_base)--;
143	**stack_base = (intptr_t)(void *)envp;
144	(*stack_base)--;
145	**stack_base = (intptr_t)(void *)argv;
146	(*stack_base)--;
147	**stack_base = imgp->argc;
148	return 0;
149}
150
151static int
152elf_linux_fixup(long **stack_base, struct image_params *imgp)
153{
154	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
155	long *pos;
156
157	pos = *stack_base + (imgp->argc + imgp->envc + 2);
158
159	if (args->trace) {
160		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
161	}
162	if (args->execfd != -1) {
163		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
164	}
165	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
166	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
167	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
168	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
169	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
170	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
171	AUXARGS_ENTRY(pos, AT_BASE, args->base);
172	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid);
173	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid);
174	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid);
175	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid);
176	AUXARGS_ENTRY(pos, AT_NULL, 0);
177
178	free(imgp->auxargs, M_TEMP);
179	imgp->auxargs = NULL;
180
181	(*stack_base)--;
182	**stack_base = (long)imgp->argc;
183	return 0;
184}
185
186extern int _ucodesel, _udatasel;
187
188/*
189 * Send an interrupt to process.
190 *
191 * Stack is set up to allow sigcode stored
192 * in u. to call routine, followed by kcall
193 * to sigreturn routine below.  After sigreturn
194 * resets the signal mask, the stack, and the
195 * frame pointer, it returns to the user
196 * specified pc, psl.
197 */
198
199static void
200linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
201{
202	register struct proc *p = curproc;
203	register struct trapframe *regs;
204	struct linux_sigframe *fp, frame;
205	struct sigacts *psp = p->p_sigacts;
206	int oonstack;
207
208	regs = p->p_md.md_regs;
209	oonstack = p->p_sigstk.ss_flags & SS_ONSTACK;
210
211#ifdef DEBUG
212	printf("Linux-emul(%ld): linux_sendsig(%p, %d, %p, %lu)\n",
213	    (long)p->p_pid, catcher, sig, (void*)mask, code);
214#endif
215	/*
216	 * Allocate space for the signal handler context.
217	 */
218	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
219	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
220		fp = (struct linux_sigframe *)(p->p_sigstk.ss_sp +
221		    p->p_sigstk.ss_size - sizeof(struct linux_sigframe));
222		p->p_sigstk.ss_flags |= SS_ONSTACK;
223	} else {
224		fp = (struct linux_sigframe *)regs->tf_esp - 1;
225	}
226
227	/*
228	 * grow() will return FALSE if the fp will not fit inside the stack
229	 *	and the stack can not be grown. useracc will return FALSE
230	 *	if access is denied.
231	 */
232	if ((grow_stack (p, (int)fp) == FALSE) ||
233	    !useracc((caddr_t)fp, sizeof (struct linux_sigframe),
234	    VM_PROT_WRITE)) {
235		/*
236		 * Process has trashed its stack; give it an illegal
237		 * instruction to halt it in its tracks.
238		 */
239		SIGACTION(p, SIGILL) = SIG_DFL;
240		SIGDELSET(p->p_sigignore, SIGILL);
241		SIGDELSET(p->p_sigcatch, SIGILL);
242		SIGDELSET(p->p_sigmask, SIGILL);
243		psignal(p, SIGILL);
244		return;
245	}
246
247	/*
248	 * Build the argument list for the signal handler.
249	 */
250	if (p->p_sysent->sv_sigtbl)
251		if (sig <= p->p_sysent->sv_sigsize)
252			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
253
254	frame.sf_handler = catcher;
255	frame.sf_sig = sig;
256
257	/*
258	 * Build the signal context to be used by sigreturn.
259	 */
260	frame.sf_sc.sc_mask   = mask->__bits[0];
261	frame.sf_sc.sc_gs     = rgs();
262	frame.sf_sc.sc_fs     = regs->tf_fs;
263	frame.sf_sc.sc_es     = regs->tf_es;
264	frame.sf_sc.sc_ds     = regs->tf_ds;
265	frame.sf_sc.sc_edi    = regs->tf_edi;
266	frame.sf_sc.sc_esi    = regs->tf_esi;
267	frame.sf_sc.sc_ebp    = regs->tf_ebp;
268	frame.sf_sc.sc_ebx    = regs->tf_ebx;
269	frame.sf_sc.sc_edx    = regs->tf_edx;
270	frame.sf_sc.sc_ecx    = regs->tf_ecx;
271	frame.sf_sc.sc_eax    = regs->tf_eax;
272	frame.sf_sc.sc_eip    = regs->tf_eip;
273	frame.sf_sc.sc_cs     = regs->tf_cs;
274	frame.sf_sc.sc_eflags = regs->tf_eflags;
275	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
276	frame.sf_sc.sc_ss     = regs->tf_ss;
277	frame.sf_sc.sc_err    = regs->tf_err;
278	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
279
280	if (copyout(&frame, fp, sizeof(frame)) != 0) {
281		/*
282		 * Process has trashed its stack; give it an illegal
283		 * instruction to halt it in its tracks.
284		 */
285		sigexit(p, SIGILL);
286		/* NOTREACHED */
287	}
288
289	/*
290	 * Build context to run handler in.
291	 */
292	regs->tf_esp = (int)fp;
293	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
294	regs->tf_eflags &= ~PSL_VM;
295	regs->tf_cs = _ucodesel;
296	regs->tf_ds = _udatasel;
297	regs->tf_es = _udatasel;
298	regs->tf_fs = _udatasel;
299	load_gs(_udatasel);
300	regs->tf_ss = _udatasel;
301}
302
303/*
304 * System call to cleanup state after a signal
305 * has been taken.  Reset signal mask and
306 * stack state from context left by sendsig (above).
307 * Return to previous pc and psl as specified by
308 * context left by sendsig. Check carefully to
309 * make sure that the user has not modified the
310 * psl to gain improper privileges or to cause
311 * a machine fault.
312 */
313int
314linux_sigreturn(p, args)
315	struct proc *p;
316	struct linux_sigreturn_args *args;
317{
318	struct linux_sigcontext *scp, context;
319	register struct trapframe *regs;
320	int eflags;
321
322	regs = p->p_md.md_regs;
323
324#ifdef DEBUG
325	printf("Linux-emul(%ld): linux_sigreturn(%p)\n",
326	    (long)p->p_pid, (void *)args->scp);
327#endif
328	/*
329	 * The trampoline code hands us the context.
330	 * It is unsafe to keep track of it ourselves, in the event that a
331	 * program jumps out of a signal handler.
332	 */
333	scp = SCARG(args,scp);
334	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
335		return (EFAULT);
336
337	/*
338	 * Check for security violations.
339	 */
340#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
341	eflags = context.sc_eflags;
342	/*
343	 * XXX do allow users to change the privileged flag PSL_RF.  The
344	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
345	 * sometimes set it there too.  tf_eflags is kept in the signal
346	 * context during signal handling and there is no other place
347	 * to remember it, so the PSL_RF bit may be corrupted by the
348	 * signal handler without us knowing.  Corruption of the PSL_RF
349	 * bit at worst causes one more or one less debugger trap, so
350	 * allowing it is fairly harmless.
351	 */
352	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
353    		return(EINVAL);
354	}
355
356	/*
357	 * Don't allow users to load a valid privileged %cs.  Let the
358	 * hardware check for invalid selectors, excess privilege in
359	 * other selectors, invalid %eip's and invalid %esp's.
360	 */
361#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
362	if (!CS_SECURE(context.sc_cs)) {
363		trapsignal(p, SIGBUS, T_PROTFLT);
364		return(EINVAL);
365	}
366
367	p->p_sigstk.ss_flags &= ~SS_ONSTACK;
368	SIGSETOLD(p->p_sigmask, context.sc_mask);
369	SIG_CANTMASK(p->p_sigmask);
370
371	/*
372	 * Restore signal context.
373	 */
374	/* %gs was restored by the trampoline. */
375	regs->tf_fs     = context.sc_fs;
376	regs->tf_es     = context.sc_es;
377	regs->tf_ds     = context.sc_ds;
378	regs->tf_edi    = context.sc_edi;
379	regs->tf_esi    = context.sc_esi;
380	regs->tf_ebp    = context.sc_ebp;
381	regs->tf_ebx    = context.sc_ebx;
382	regs->tf_edx    = context.sc_edx;
383	regs->tf_ecx    = context.sc_ecx;
384	regs->tf_eax    = context.sc_eax;
385	regs->tf_eip    = context.sc_eip;
386	regs->tf_cs     = context.sc_cs;
387	regs->tf_eflags = eflags;
388	regs->tf_esp    = context.sc_esp_at_signal;
389	regs->tf_ss     = context.sc_ss;
390
391	return (EJUSTRETURN);
392}
393
394static void
395linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
396{
397	args[0] = tf->tf_ebx;
398	args[1] = tf->tf_ecx;
399	args[2] = tf->tf_edx;
400	args[3] = tf->tf_esi;
401	args[4] = tf->tf_edi;
402	*params = NULL;		/* no copyin */
403}
404
405struct sysentvec linux_sysvec = {
406	LINUX_SYS_MAXSYSCALL,
407	linux_sysent,
408	0xff,
409	LINUX_SIGTBLSZ,
410	bsd_to_linux_signal,
411	ELAST + 1,
412	bsd_to_linux_errno,
413	translate_traps,
414	linux_fixup,
415	linux_sendsig,
416	linux_sigcode,
417	&linux_szsigcode,
418	linux_prepsyscall,
419	"Linux a.out",
420	aout_coredump
421};
422
423struct sysentvec elf_linux_sysvec = {
424	LINUX_SYS_MAXSYSCALL,
425	linux_sysent,
426	0xff,
427	LINUX_SIGTBLSZ,
428	bsd_to_linux_signal,
429	ELAST + 1,
430	bsd_to_linux_errno,
431	translate_traps,
432	elf_linux_fixup,
433	linux_sendsig,
434	linux_sigcode,
435	&linux_szsigcode,
436	linux_prepsyscall,
437	"Linux ELF",
438	elf_coredump
439};
440
441static Elf32_Brandinfo linux_brand = {
442					"Linux",
443					"/compat/linux",
444					"/lib/ld-linux.so.1",
445					&elf_linux_sysvec
446				 };
447
448static Elf32_Brandinfo linux_glibc2brand = {
449					"Linux",
450					"/compat/linux",
451					"/lib/ld-linux.so.2",
452					&elf_linux_sysvec
453				 };
454
455Elf32_Brandinfo *linux_brandlist[] = {
456					&linux_brand,
457					&linux_glibc2brand,
458					NULL
459				};
460
461static int
462linux_elf_modevent(module_t mod, int type, void *data)
463{
464	Elf32_Brandinfo **brandinfo;
465	int error;
466
467	error = 0;
468
469	switch(type) {
470	case MOD_LOAD:
471		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
472		     ++brandinfo)
473			if (elf_insert_brand_entry(*brandinfo) < 0)
474				error = EINVAL;
475		if (error)
476			printf("cannot insert Linux elf brand handler\n");
477		else {
478			linux_ioctl_register_handlers(&linux_ioctl_handler_set);
479			if (bootverbose)
480				printf("Linux-ELF exec handler installed\n");
481		}
482		break;
483	case MOD_UNLOAD:
484		linux_ioctl_unregister_handlers(&linux_ioctl_handler_set);
485		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
486		     ++brandinfo)
487			if (elf_brand_inuse(*brandinfo))
488				error = EBUSY;
489
490		if (error == 0) {
491			for (brandinfo = &linux_brandlist[0];
492			     *brandinfo != NULL; ++brandinfo)
493				if (elf_remove_brand_entry(*brandinfo) < 0)
494					error = EINVAL;
495		}
496		if (error)
497			printf("Could not deinstall ELF interpreter entry\n");
498		else if (bootverbose)
499			printf("Linux-elf exec handler removed\n");
500		break;
501	default:
502		break;
503	}
504	return error;
505}
506static moduledata_t linux_elf_mod = {
507	"linuxelf",
508	linux_elf_modevent,
509	0
510};
511DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
512