Deleted Added
full compact
linux_sysvec.c (114983) linux_sysvec.c (115705)
1/*-
2 * Copyright (c) 1994-1996 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1/*-
2 * Copyright (c) 1994-1996 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $FreeBSD: head/sys/i386/linux/linux_sysvec.c 114983 2003-05-13 20:36:02Z jhb $
29 */
30
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/i386/linux/linux_sysvec.c 115705 2003-06-02 16:56:40Z obrien $");
31
31/* XXX we use functions that might not exist. */
32#include "opt_compat.h"
33
34#ifndef COMPAT_43
35#error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36#endif
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/imgact.h>
41#include <sys/imgact_aout.h>
42#include <sys/imgact_elf.h>
43#include <sys/lock.h>
44#include <sys/malloc.h>
45#include <sys/mutex.h>
46#include <sys/proc.h>
47#include <sys/signalvar.h>
48#include <sys/syscallsubr.h>
49#include <sys/sysent.h>
50#include <sys/sysproto.h>
51#include <sys/user.h>
52#include <sys/vnode.h>
53
54#include <vm/vm.h>
55#include <vm/vm_param.h>
56#include <vm/vm_page.h>
57#include <vm/vm_extern.h>
58#include <sys/exec.h>
59#include <sys/kernel.h>
60#include <sys/module.h>
61#include <machine/cpu.h>
62#include <machine/md_var.h>
63#include <sys/mutex.h>
64
65#include <vm/vm.h>
66#include <vm/vm_param.h>
67#include <vm/pmap.h>
68#include <vm/vm_map.h>
69#include <vm/vm_object.h>
70
71#include <i386/linux/linux.h>
72#include <i386/linux/linux_proto.h>
73#include <compat/linux/linux_mib.h>
74#include <compat/linux/linux_signal.h>
75#include <compat/linux/linux_util.h>
76
77MODULE_VERSION(linux, 1);
78MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
79MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
80MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
81
82MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
83
84#if BYTE_ORDER == LITTLE_ENDIAN
85#define SHELLMAGIC 0x2123 /* #! */
86#else
87#define SHELLMAGIC 0x2321
88#endif
89
90/*
91 * Allow the sendsig functions to use the ldebug() facility
92 * even though they are not syscalls themselves. Map them
93 * to syscall 0. This is slightly less bogus than using
94 * ldebug(sigreturn).
95 */
96#define LINUX_SYS_linux_rt_sendsig 0
97#define LINUX_SYS_linux_sendsig 0
98
99extern char linux_sigcode[];
100extern int linux_szsigcode;
101
102extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
103
104SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
105
106static int linux_fixup(register_t **stack_base,
107 struct image_params *iparams);
108static int elf_linux_fixup(register_t **stack_base,
109 struct image_params *iparams);
110static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
111 caddr_t *params);
112static void linux_sendsig(sig_t catcher, int sig, sigset_t *mask,
113 u_long code);
114static void exec_linux_setregs(struct thread *td, u_long entry,
115 u_long stack, u_long ps_strings);
116
117/*
118 * Linux syscalls return negative errno's, we do positive and map them
119 */
120static int bsd_to_linux_errno[ELAST + 1] = {
121 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
122 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
123 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
124 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
125 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
126 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
127 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
128 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
129 -6, -6, -43, -42, -75, -6, -84
130};
131
132int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
133 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
134 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
135 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
136 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
137 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
138 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
139 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
140 0, LINUX_SIGUSR1, LINUX_SIGUSR2
141};
142
143int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
144 SIGHUP, SIGINT, SIGQUIT, SIGILL,
145 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
146 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
147 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
148 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
149 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
150 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
151 SIGIO, SIGURG, SIGSYS
152};
153
154#define LINUX_T_UNKNOWN 255
155static int _bsd_to_linux_trapcode[] = {
156 LINUX_T_UNKNOWN, /* 0 */
157 6, /* 1 T_PRIVINFLT */
158 LINUX_T_UNKNOWN, /* 2 */
159 3, /* 3 T_BPTFLT */
160 LINUX_T_UNKNOWN, /* 4 */
161 LINUX_T_UNKNOWN, /* 5 */
162 16, /* 6 T_ARITHTRAP */
163 254, /* 7 T_ASTFLT */
164 LINUX_T_UNKNOWN, /* 8 */
165 13, /* 9 T_PROTFLT */
166 1, /* 10 T_TRCTRAP */
167 LINUX_T_UNKNOWN, /* 11 */
168 14, /* 12 T_PAGEFLT */
169 LINUX_T_UNKNOWN, /* 13 */
170 17, /* 14 T_ALIGNFLT */
171 LINUX_T_UNKNOWN, /* 15 */
172 LINUX_T_UNKNOWN, /* 16 */
173 LINUX_T_UNKNOWN, /* 17 */
174 0, /* 18 T_DIVIDE */
175 2, /* 19 T_NMI */
176 4, /* 20 T_OFLOW */
177 5, /* 21 T_BOUND */
178 7, /* 22 T_DNA */
179 8, /* 23 T_DOUBLEFLT */
180 9, /* 24 T_FPOPFLT */
181 10, /* 25 T_TSSFLT */
182 11, /* 26 T_SEGNPFLT */
183 12, /* 27 T_STKFLT */
184 18, /* 28 T_MCHK */
185 19, /* 29 T_XMMFLT */
186 15 /* 30 T_RESERVED */
187};
188#define bsd_to_linux_trapcode(code) \
189 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
190 _bsd_to_linux_trapcode[(code)]: \
191 LINUX_T_UNKNOWN)
192
193/*
194 * If FreeBSD & Linux have a difference of opinion about what a trap
195 * means, deal with it here.
196 *
197 * MPSAFE
198 */
199static int
200translate_traps(int signal, int trap_code)
201{
202 if (signal != SIGBUS)
203 return signal;
204 switch (trap_code) {
205 case T_PROTFLT:
206 case T_TSSFLT:
207 case T_DOUBLEFLT:
208 case T_PAGEFLT:
209 return SIGSEGV;
210 default:
211 return signal;
212 }
213}
214
215static int
216linux_fixup(register_t **stack_base, struct image_params *imgp)
217{
218 register_t *argv, *envp;
219
220 argv = *stack_base;
221 envp = *stack_base + (imgp->argc + 1);
222 (*stack_base)--;
223 **stack_base = (intptr_t)(void *)envp;
224 (*stack_base)--;
225 **stack_base = (intptr_t)(void *)argv;
226 (*stack_base)--;
227 **stack_base = imgp->argc;
228 return 0;
229}
230
231static int
232elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
233{
234 Elf32_Auxargs *args;
235 register_t *pos;
236
237 KASSERT(curthread->td_proc == imgp->proc &&
238 (curthread->td_proc->p_flag & P_THREADED) == 0,
239 ("unsafe elf_linux_fixup(), should be curproc"));
240 args = (Elf32_Auxargs *)imgp->auxargs;
241 pos = *stack_base + (imgp->argc + imgp->envc + 2);
242
243 if (args->trace)
244 AUXARGS_ENTRY(pos, AT_DEBUG, 1);
245 if (args->execfd != -1)
246 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
247 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
248 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
249 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
250 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
251 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
252 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
253 AUXARGS_ENTRY(pos, AT_BASE, args->base);
254 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
255 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
256 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
257 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
258 AUXARGS_ENTRY(pos, AT_NULL, 0);
259
260 free(imgp->auxargs, M_TEMP);
261 imgp->auxargs = NULL;
262
263 (*stack_base)--;
264 **stack_base = (register_t)imgp->argc;
265 return 0;
266}
267
268extern int _ucodesel, _udatasel;
269extern unsigned long linux_sznonrtsigcode;
270
271static void
272linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
273{
274 struct thread *td = curthread;
275 struct proc *p = td->td_proc;
276 struct sigacts *psp;
277 struct trapframe *regs;
278 struct l_rt_sigframe *fp, frame;
279 int oonstack;
280
281 PROC_LOCK_ASSERT(p, MA_OWNED);
282 psp = p->p_sigacts;
283 mtx_assert(&psp->ps_mtx, MA_OWNED);
284 regs = td->td_frame;
285 oonstack = sigonstack(regs->tf_esp);
286
287#ifdef DEBUG
288 if (ldebug(rt_sendsig))
289 printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
290 catcher, sig, (void*)mask, code);
291#endif
292 /*
293 * Allocate space for the signal handler context.
294 */
295 if ((p->p_flag & P_ALTSTACK) && !oonstack &&
296 SIGISMEMBER(psp->ps_sigonstack, sig)) {
297 fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp +
298 p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe));
299 } else
300 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
301 mtx_unlock(&psp->ps_mtx);
302
303 /*
304 * Build the argument list for the signal handler.
305 */
306 if (p->p_sysent->sv_sigtbl)
307 if (sig <= p->p_sysent->sv_sigsize)
308 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
309
310 bzero(&frame, sizeof(frame));
311
312 frame.sf_handler = catcher;
313 frame.sf_sig = sig;
314 frame.sf_siginfo = &fp->sf_si;
315 frame.sf_ucontext = &fp->sf_sc;
316
317 /* Fill in POSIX parts */
318 frame.sf_si.lsi_signo = sig;
319 frame.sf_si.lsi_code = code;
320 frame.sf_si.lsi_addr = (void *)regs->tf_err;
321
322 /*
323 * Build the signal context to be used by sigreturn.
324 */
325 frame.sf_sc.uc_flags = 0; /* XXX ??? */
326 frame.sf_sc.uc_link = NULL; /* XXX ??? */
327
328 frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
329 frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
330 frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
331 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
332 PROC_UNLOCK(p);
333
334 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
335
336 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
337 frame.sf_sc.uc_mcontext.sc_gs = rgs();
338 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
339 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
340 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
341 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
342 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
343 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
344 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
345 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
346 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
347 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
348 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
349 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
350 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
351 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
352 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
353 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
354 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
355
356#ifdef DEBUG
357 if (ldebug(rt_sendsig))
358 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
359 frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
360 p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
361#endif
362
363 if (copyout(&frame, fp, sizeof(frame)) != 0) {
364 /*
365 * Process has trashed its stack; give it an illegal
366 * instruction to halt it in its tracks.
367 */
368#ifdef DEBUG
369 if (ldebug(rt_sendsig))
370 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
371 fp, oonstack);
372#endif
373 PROC_LOCK(p);
374 sigexit(td, SIGILL);
375 }
376
377 /*
378 * Build context to run handler in.
379 */
380 regs->tf_esp = (int)fp;
381 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
382 linux_sznonrtsigcode;
383 regs->tf_eflags &= ~(PSL_T | PSL_VM);
384 regs->tf_cs = _ucodesel;
385 regs->tf_ds = _udatasel;
386 regs->tf_es = _udatasel;
387 regs->tf_fs = _udatasel;
388 regs->tf_ss = _udatasel;
389 PROC_LOCK(p);
390 mtx_lock(&psp->ps_mtx);
391}
392
393
394/*
395 * Send an interrupt to process.
396 *
397 * Stack is set up to allow sigcode stored
398 * in u. to call routine, followed by kcall
399 * to sigreturn routine below. After sigreturn
400 * resets the signal mask, the stack, and the
401 * frame pointer, it returns to the user
402 * specified pc, psl.
403 */
404static void
405linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
406{
407 struct thread *td = curthread;
408 struct proc *p = td->td_proc;
409 struct sigacts *psp;
410 struct trapframe *regs;
411 struct l_sigframe *fp, frame;
412 l_sigset_t lmask;
413 int oonstack, i;
414
415 PROC_LOCK_ASSERT(p, MA_OWNED);
416 psp = p->p_sigacts;
417 mtx_assert(&psp->ps_mtx, MA_OWNED);
418 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
419 /* Signal handler installed with SA_SIGINFO. */
420 linux_rt_sendsig(catcher, sig, mask, code);
421 return;
422 }
423
424 regs = td->td_frame;
425 oonstack = sigonstack(regs->tf_esp);
426
427#ifdef DEBUG
428 if (ldebug(sendsig))
429 printf(ARGS(sendsig, "%p, %d, %p, %lu"),
430 catcher, sig, (void*)mask, code);
431#endif
432
433 /*
434 * Allocate space for the signal handler context.
435 */
436 if ((p->p_flag & P_ALTSTACK) && !oonstack &&
437 SIGISMEMBER(psp->ps_sigonstack, sig)) {
438 fp = (struct l_sigframe *)(p->p_sigstk.ss_sp +
439 p->p_sigstk.ss_size - sizeof(struct l_sigframe));
440 } else
441 fp = (struct l_sigframe *)regs->tf_esp - 1;
442 mtx_unlock(&psp->ps_mtx);
443 PROC_UNLOCK(p);
444
445 /*
446 * Build the argument list for the signal handler.
447 */
448 if (p->p_sysent->sv_sigtbl)
449 if (sig <= p->p_sysent->sv_sigsize)
450 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
451
452 bzero(&frame, sizeof(frame));
453
454 frame.sf_handler = catcher;
455 frame.sf_sig = sig;
456
457 bsd_to_linux_sigset(mask, &lmask);
458
459 /*
460 * Build the signal context to be used by sigreturn.
461 */
462 frame.sf_sc.sc_mask = lmask.__bits[0];
463 frame.sf_sc.sc_gs = rgs();
464 frame.sf_sc.sc_fs = regs->tf_fs;
465 frame.sf_sc.sc_es = regs->tf_es;
466 frame.sf_sc.sc_ds = regs->tf_ds;
467 frame.sf_sc.sc_edi = regs->tf_edi;
468 frame.sf_sc.sc_esi = regs->tf_esi;
469 frame.sf_sc.sc_ebp = regs->tf_ebp;
470 frame.sf_sc.sc_ebx = regs->tf_ebx;
471 frame.sf_sc.sc_edx = regs->tf_edx;
472 frame.sf_sc.sc_ecx = regs->tf_ecx;
473 frame.sf_sc.sc_eax = regs->tf_eax;
474 frame.sf_sc.sc_eip = regs->tf_eip;
475 frame.sf_sc.sc_cs = regs->tf_cs;
476 frame.sf_sc.sc_eflags = regs->tf_eflags;
477 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
478 frame.sf_sc.sc_ss = regs->tf_ss;
479 frame.sf_sc.sc_err = regs->tf_err;
480 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
481
482 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
483 frame.sf_extramask[i] = lmask.__bits[i+1];
484
485 if (copyout(&frame, fp, sizeof(frame)) != 0) {
486 /*
487 * Process has trashed its stack; give it an illegal
488 * instruction to halt it in its tracks.
489 */
490 PROC_LOCK(p);
491 sigexit(td, SIGILL);
492 }
493
494 /*
495 * Build context to run handler in.
496 */
497 regs->tf_esp = (int)fp;
498 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
499 regs->tf_eflags &= ~(PSL_T | PSL_VM);
500 regs->tf_cs = _ucodesel;
501 regs->tf_ds = _udatasel;
502 regs->tf_es = _udatasel;
503 regs->tf_fs = _udatasel;
504 regs->tf_ss = _udatasel;
505 PROC_LOCK(p);
506 mtx_lock(&psp->ps_mtx);
507}
508
509/*
510 * System call to cleanup state after a signal
511 * has been taken. Reset signal mask and
512 * stack state from context left by sendsig (above).
513 * Return to previous pc and psl as specified by
514 * context left by sendsig. Check carefully to
515 * make sure that the user has not modified the
516 * psl to gain improper privileges or to cause
517 * a machine fault.
518 */
519int
520linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
521{
522 struct proc *p = td->td_proc;
523 struct l_sigframe frame;
524 struct trapframe *regs;
525 l_sigset_t lmask;
526 int eflags, i;
527
528 regs = td->td_frame;
529
530#ifdef DEBUG
531 if (ldebug(sigreturn))
532 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
533#endif
534 /*
535 * The trampoline code hands us the sigframe.
536 * It is unsafe to keep track of it ourselves, in the event that a
537 * program jumps out of a signal handler.
538 */
539 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
540 return (EFAULT);
541
542 /*
543 * Check for security violations.
544 */
545#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
546 eflags = frame.sf_sc.sc_eflags;
547 /*
548 * XXX do allow users to change the privileged flag PSL_RF. The
549 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
550 * sometimes set it there too. tf_eflags is kept in the signal
551 * context during signal handling and there is no other place
552 * to remember it, so the PSL_RF bit may be corrupted by the
553 * signal handler without us knowing. Corruption of the PSL_RF
554 * bit at worst causes one more or one less debugger trap, so
555 * allowing it is fairly harmless.
556 */
557 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
558 return(EINVAL);
559
560 /*
561 * Don't allow users to load a valid privileged %cs. Let the
562 * hardware check for invalid selectors, excess privilege in
563 * other selectors, invalid %eip's and invalid %esp's.
564 */
565#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
566 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
567 trapsignal(td, SIGBUS, T_PROTFLT);
568 return(EINVAL);
569 }
570
571 lmask.__bits[0] = frame.sf_sc.sc_mask;
572 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
573 lmask.__bits[i+1] = frame.sf_extramask[i];
574 PROC_LOCK(p);
575 linux_to_bsd_sigset(&lmask, &td->td_sigmask);
576 SIG_CANTMASK(td->td_sigmask);
577 signotify(td);
578 PROC_UNLOCK(p);
579
580 /*
581 * Restore signal context.
582 */
583 /* %gs was restored by the trampoline. */
584 regs->tf_fs = frame.sf_sc.sc_fs;
585 regs->tf_es = frame.sf_sc.sc_es;
586 regs->tf_ds = frame.sf_sc.sc_ds;
587 regs->tf_edi = frame.sf_sc.sc_edi;
588 regs->tf_esi = frame.sf_sc.sc_esi;
589 regs->tf_ebp = frame.sf_sc.sc_ebp;
590 regs->tf_ebx = frame.sf_sc.sc_ebx;
591 regs->tf_edx = frame.sf_sc.sc_edx;
592 regs->tf_ecx = frame.sf_sc.sc_ecx;
593 regs->tf_eax = frame.sf_sc.sc_eax;
594 regs->tf_eip = frame.sf_sc.sc_eip;
595 regs->tf_cs = frame.sf_sc.sc_cs;
596 regs->tf_eflags = eflags;
597 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
598 regs->tf_ss = frame.sf_sc.sc_ss;
599
600 return (EJUSTRETURN);
601}
602
603/*
604 * System call to cleanup state after a signal
605 * has been taken. Reset signal mask and
606 * stack state from context left by rt_sendsig (above).
607 * Return to previous pc and psl as specified by
608 * context left by sendsig. Check carefully to
609 * make sure that the user has not modified the
610 * psl to gain improper privileges or to cause
611 * a machine fault.
612 */
613int
614linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
615{
616 struct proc *p = td->td_proc;
617 struct l_ucontext uc;
618 struct l_sigcontext *context;
619 l_stack_t *lss;
620 stack_t ss;
621 struct trapframe *regs;
622 int eflags;
623
624 regs = td->td_frame;
625
626#ifdef DEBUG
627 if (ldebug(rt_sigreturn))
628 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
629#endif
630 /*
631 * The trampoline code hands us the ucontext.
632 * It is unsafe to keep track of it ourselves, in the event that a
633 * program jumps out of a signal handler.
634 */
635 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
636 return (EFAULT);
637
638 context = &uc.uc_mcontext;
639
640 /*
641 * Check for security violations.
642 */
643#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
644 eflags = context->sc_eflags;
645 /*
646 * XXX do allow users to change the privileged flag PSL_RF. The
647 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
648 * sometimes set it there too. tf_eflags is kept in the signal
649 * context during signal handling and there is no other place
650 * to remember it, so the PSL_RF bit may be corrupted by the
651 * signal handler without us knowing. Corruption of the PSL_RF
652 * bit at worst causes one more or one less debugger trap, so
653 * allowing it is fairly harmless.
654 */
655 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
656 return(EINVAL);
657
658 /*
659 * Don't allow users to load a valid privileged %cs. Let the
660 * hardware check for invalid selectors, excess privilege in
661 * other selectors, invalid %eip's and invalid %esp's.
662 */
663#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
664 if (!CS_SECURE(context->sc_cs)) {
665 trapsignal(td, SIGBUS, T_PROTFLT);
666 return(EINVAL);
667 }
668
669 PROC_LOCK(p);
670 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
671 SIG_CANTMASK(td->td_sigmask);
672 signotify(td);
673 PROC_UNLOCK(p);
674
675 /*
676 * Restore signal context
677 */
678 /* %gs was restored by the trampoline. */
679 regs->tf_fs = context->sc_fs;
680 regs->tf_es = context->sc_es;
681 regs->tf_ds = context->sc_ds;
682 regs->tf_edi = context->sc_edi;
683 regs->tf_esi = context->sc_esi;
684 regs->tf_ebp = context->sc_ebp;
685 regs->tf_ebx = context->sc_ebx;
686 regs->tf_edx = context->sc_edx;
687 regs->tf_ecx = context->sc_ecx;
688 regs->tf_eax = context->sc_eax;
689 regs->tf_eip = context->sc_eip;
690 regs->tf_cs = context->sc_cs;
691 regs->tf_eflags = eflags;
692 regs->tf_esp = context->sc_esp_at_signal;
693 regs->tf_ss = context->sc_ss;
694
695 /*
696 * call sigaltstack & ignore results..
697 */
698 lss = &uc.uc_stack;
699 ss.ss_sp = lss->ss_sp;
700 ss.ss_size = lss->ss_size;
701 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
702
703#ifdef DEBUG
704 if (ldebug(rt_sigreturn))
705 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
706 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
707#endif
708 (void)kern_sigaltstack(td, &ss, NULL);
709
710 return (EJUSTRETURN);
711}
712
713/*
714 * MPSAFE
715 */
716static void
717linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
718{
719 args[0] = tf->tf_ebx;
720 args[1] = tf->tf_ecx;
721 args[2] = tf->tf_edx;
722 args[3] = tf->tf_esi;
723 args[4] = tf->tf_edi;
724 args[5] = tf->tf_ebp; /* Unconfirmed */
725 *params = NULL; /* no copyin */
726}
727
728
729
730/*
731 * Dump core, into a file named as described in the comments for
732 * expand_name(), unless the process was setuid/setgid.
733 */
734static int
735linux_aout_coredump(struct thread *td, struct vnode *vp, off_t limit)
736{
737 struct proc *p = td->td_proc;
738 struct ucred *cred = td->td_ucred;
739 struct vmspace *vm = p->p_vmspace;
740 char *tempuser;
741 int error;
742
743 if (ctob((uarea_pages + kstack_pages) +
744 vm->vm_dsize + vm->vm_ssize) >= limit)
745 return (EFAULT);
746 tempuser = malloc(ctob(uarea_pages + kstack_pages), M_TEMP,
747 M_WAITOK | M_ZERO);
748 if (tempuser == NULL)
749 return (ENOMEM);
750 PROC_LOCK(p);
751 fill_kinfo_proc(p, &p->p_uarea->u_kproc);
752 PROC_UNLOCK(p);
753 bcopy(p->p_uarea, tempuser, sizeof(struct user));
754 bcopy(td->td_frame,
755 tempuser + ctob(uarea_pages) +
756 ((caddr_t)td->td_frame - (caddr_t)td->td_kstack),
757 sizeof(struct trapframe));
758 error = vn_rdwr(UIO_WRITE, vp, (caddr_t)tempuser,
759 ctob(uarea_pages + kstack_pages),
760 (off_t)0, UIO_SYSSPACE, IO_UNIT, cred, NOCRED,
761 (int *)NULL, td);
762 free(tempuser, M_TEMP);
763 if (error == 0)
764 error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr,
765 (int)ctob(vm->vm_dsize),
766 (off_t)ctob(uarea_pages + kstack_pages), UIO_USERSPACE,
767 IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td);
768 if (error == 0)
769 error = vn_rdwr_inchunks(UIO_WRITE, vp,
770 (caddr_t)trunc_page(USRSTACK - ctob(vm->vm_ssize)),
771 round_page(ctob(vm->vm_ssize)),
772 (off_t)ctob(uarea_pages + kstack_pages) +
773 ctob(vm->vm_dsize), UIO_USERSPACE,
774 IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td);
775 return (error);
776}
777/*
778 * If a linux binary is exec'ing something, try this image activator
779 * first. We override standard shell script execution in order to
780 * be able to modify the interpreter path. We only do this if a linux
781 * binary is doing the exec, so we do not create an EXEC module for it.
782 */
783static int exec_linux_imgact_try(struct image_params *iparams);
784
785static int
786exec_linux_imgact_try(struct image_params *imgp)
787{
788 const char *head = (const char *)imgp->image_header;
789 int error = -1;
790
791 /*
792 * The interpreter for shell scripts run from a linux binary needs
793 * to be located in /compat/linux if possible in order to recursively
794 * maintain linux path emulation.
795 */
796 if (((const short *)head)[0] == SHELLMAGIC) {
797 /*
798 * Run our normal shell image activator. If it succeeds attempt
799 * to use the alternate path for the interpreter. If an alternate
800 * path is found, use our stringspace to store it.
801 */
802 if ((error = exec_shell_imgact(imgp)) == 0) {
803 char *rpath = NULL;
804
805 linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL,
806 imgp->interpreter_name, &rpath, 0);
807 if (rpath != imgp->interpreter_name) {
808 int len = strlen(rpath) + 1;
809
810 if (len <= MAXSHELLCMDLEN) {
811 memcpy(imgp->interpreter_name, rpath, len);
812 }
813 free(rpath, M_TEMP);
814 }
815 }
816 }
817 return(error);
818}
819
820/*
821 * exec_setregs may initialize some registers differently than Linux
822 * does, thus potentially confusing Linux binaries. If necessary, we
823 * override the exec_setregs default(s) here.
824 */
825static void
826exec_linux_setregs(struct thread *td, u_long entry,
827 u_long stack, u_long ps_strings)
828{
829 struct pcb *pcb = td->td_pcb;
830
831 exec_setregs(td, entry, stack, ps_strings);
832
833 /* Linux sets %gs to 0, we default to _udatasel */
834 pcb->pcb_gs = 0; load_gs(0);
835}
836
837struct sysentvec linux_sysvec = {
838 LINUX_SYS_MAXSYSCALL,
839 linux_sysent,
840 0xff,
841 LINUX_SIGTBLSZ,
842 bsd_to_linux_signal,
843 ELAST + 1,
844 bsd_to_linux_errno,
845 translate_traps,
846 linux_fixup,
847 linux_sendsig,
848 linux_sigcode,
849 &linux_szsigcode,
850 linux_prepsyscall,
851 "Linux a.out",
852 linux_aout_coredump,
853 exec_linux_imgact_try,
854 LINUX_MINSIGSTKSZ,
855 PAGE_SIZE,
856 VM_MIN_ADDRESS,
857 VM_MAXUSER_ADDRESS,
858 USRSTACK,
859 PS_STRINGS,
860 VM_PROT_ALL,
861 exec_copyout_strings,
862 exec_linux_setregs
863};
864
865struct sysentvec elf_linux_sysvec = {
866 LINUX_SYS_MAXSYSCALL,
867 linux_sysent,
868 0xff,
869 LINUX_SIGTBLSZ,
870 bsd_to_linux_signal,
871 ELAST + 1,
872 bsd_to_linux_errno,
873 translate_traps,
874 elf_linux_fixup,
875 linux_sendsig,
876 linux_sigcode,
877 &linux_szsigcode,
878 linux_prepsyscall,
879 "Linux ELF",
880 elf32_coredump,
881 exec_linux_imgact_try,
882 LINUX_MINSIGSTKSZ,
883 PAGE_SIZE,
884 VM_MIN_ADDRESS,
885 VM_MAXUSER_ADDRESS,
886 USRSTACK,
887 PS_STRINGS,
888 VM_PROT_ALL,
889 exec_copyout_strings,
890 exec_linux_setregs
891};
892
893static Elf32_Brandinfo linux_brand = {
894 ELFOSABI_LINUX,
895 EM_386,
896 "Linux",
897 "/compat/linux",
898 "/lib/ld-linux.so.1",
899 &elf_linux_sysvec
900 };
901
902static Elf32_Brandinfo linux_glibc2brand = {
903 ELFOSABI_LINUX,
904 EM_386,
905 "Linux",
906 "/compat/linux",
907 "/lib/ld-linux.so.2",
908 &elf_linux_sysvec
909 };
910
911Elf32_Brandinfo *linux_brandlist[] = {
912 &linux_brand,
913 &linux_glibc2brand,
914 NULL
915 };
916
917static int
918linux_elf_modevent(module_t mod, int type, void *data)
919{
920 Elf32_Brandinfo **brandinfo;
921 int error;
922 struct linux_ioctl_handler **lihp;
923
924 error = 0;
925
926 switch(type) {
927 case MOD_LOAD:
928 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
929 ++brandinfo)
930 if (elf32_insert_brand_entry(*brandinfo) < 0)
931 error = EINVAL;
932 if (error == 0) {
933 SET_FOREACH(lihp, linux_ioctl_handler_set)
934 linux_ioctl_register_handler(*lihp);
935 if (bootverbose)
936 printf("Linux ELF exec handler installed\n");
937 } else
938 printf("cannot insert Linux ELF brand handler\n");
939 break;
940 case MOD_UNLOAD:
941 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
942 ++brandinfo)
943 if (elf32_brand_inuse(*brandinfo))
944 error = EBUSY;
945 if (error == 0) {
946 for (brandinfo = &linux_brandlist[0];
947 *brandinfo != NULL; ++brandinfo)
948 if (elf32_remove_brand_entry(*brandinfo) < 0)
949 error = EINVAL;
950 }
951 if (error == 0) {
952 SET_FOREACH(lihp, linux_ioctl_handler_set)
953 linux_ioctl_unregister_handler(*lihp);
954 if (bootverbose)
955 printf("Linux ELF exec handler removed\n");
956 linux_mib_destroy();
957 } else
958 printf("Could not deinstall ELF interpreter entry\n");
959 break;
960 default:
961 break;
962 }
963 return error;
964}
965
966static moduledata_t linux_elf_mod = {
967 "linuxelf",
968 linux_elf_modevent,
969 0
970};
971
972DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
32/* XXX we use functions that might not exist. */
33#include "opt_compat.h"
34
35#ifndef COMPAT_43
36#error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
37#endif
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/imgact.h>
42#include <sys/imgact_aout.h>
43#include <sys/imgact_elf.h>
44#include <sys/lock.h>
45#include <sys/malloc.h>
46#include <sys/mutex.h>
47#include <sys/proc.h>
48#include <sys/signalvar.h>
49#include <sys/syscallsubr.h>
50#include <sys/sysent.h>
51#include <sys/sysproto.h>
52#include <sys/user.h>
53#include <sys/vnode.h>
54
55#include <vm/vm.h>
56#include <vm/vm_param.h>
57#include <vm/vm_page.h>
58#include <vm/vm_extern.h>
59#include <sys/exec.h>
60#include <sys/kernel.h>
61#include <sys/module.h>
62#include <machine/cpu.h>
63#include <machine/md_var.h>
64#include <sys/mutex.h>
65
66#include <vm/vm.h>
67#include <vm/vm_param.h>
68#include <vm/pmap.h>
69#include <vm/vm_map.h>
70#include <vm/vm_object.h>
71
72#include <i386/linux/linux.h>
73#include <i386/linux/linux_proto.h>
74#include <compat/linux/linux_mib.h>
75#include <compat/linux/linux_signal.h>
76#include <compat/linux/linux_util.h>
77
78MODULE_VERSION(linux, 1);
79MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
80MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
81MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
82
83MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
84
85#if BYTE_ORDER == LITTLE_ENDIAN
86#define SHELLMAGIC 0x2123 /* #! */
87#else
88#define SHELLMAGIC 0x2321
89#endif
90
91/*
92 * Allow the sendsig functions to use the ldebug() facility
93 * even though they are not syscalls themselves. Map them
94 * to syscall 0. This is slightly less bogus than using
95 * ldebug(sigreturn).
96 */
97#define LINUX_SYS_linux_rt_sendsig 0
98#define LINUX_SYS_linux_sendsig 0
99
100extern char linux_sigcode[];
101extern int linux_szsigcode;
102
103extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
104
105SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
106
107static int linux_fixup(register_t **stack_base,
108 struct image_params *iparams);
109static int elf_linux_fixup(register_t **stack_base,
110 struct image_params *iparams);
111static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
112 caddr_t *params);
113static void linux_sendsig(sig_t catcher, int sig, sigset_t *mask,
114 u_long code);
115static void exec_linux_setregs(struct thread *td, u_long entry,
116 u_long stack, u_long ps_strings);
117
118/*
119 * Linux syscalls return negative errno's, we do positive and map them
120 */
121static int bsd_to_linux_errno[ELAST + 1] = {
122 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
123 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
124 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
125 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
126 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
127 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
128 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
129 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
130 -6, -6, -43, -42, -75, -6, -84
131};
132
133int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
134 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
135 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
136 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
137 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
138 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
139 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
140 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
141 0, LINUX_SIGUSR1, LINUX_SIGUSR2
142};
143
144int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
145 SIGHUP, SIGINT, SIGQUIT, SIGILL,
146 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
147 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
148 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
149 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
150 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
151 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
152 SIGIO, SIGURG, SIGSYS
153};
154
155#define LINUX_T_UNKNOWN 255
156static int _bsd_to_linux_trapcode[] = {
157 LINUX_T_UNKNOWN, /* 0 */
158 6, /* 1 T_PRIVINFLT */
159 LINUX_T_UNKNOWN, /* 2 */
160 3, /* 3 T_BPTFLT */
161 LINUX_T_UNKNOWN, /* 4 */
162 LINUX_T_UNKNOWN, /* 5 */
163 16, /* 6 T_ARITHTRAP */
164 254, /* 7 T_ASTFLT */
165 LINUX_T_UNKNOWN, /* 8 */
166 13, /* 9 T_PROTFLT */
167 1, /* 10 T_TRCTRAP */
168 LINUX_T_UNKNOWN, /* 11 */
169 14, /* 12 T_PAGEFLT */
170 LINUX_T_UNKNOWN, /* 13 */
171 17, /* 14 T_ALIGNFLT */
172 LINUX_T_UNKNOWN, /* 15 */
173 LINUX_T_UNKNOWN, /* 16 */
174 LINUX_T_UNKNOWN, /* 17 */
175 0, /* 18 T_DIVIDE */
176 2, /* 19 T_NMI */
177 4, /* 20 T_OFLOW */
178 5, /* 21 T_BOUND */
179 7, /* 22 T_DNA */
180 8, /* 23 T_DOUBLEFLT */
181 9, /* 24 T_FPOPFLT */
182 10, /* 25 T_TSSFLT */
183 11, /* 26 T_SEGNPFLT */
184 12, /* 27 T_STKFLT */
185 18, /* 28 T_MCHK */
186 19, /* 29 T_XMMFLT */
187 15 /* 30 T_RESERVED */
188};
189#define bsd_to_linux_trapcode(code) \
190 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
191 _bsd_to_linux_trapcode[(code)]: \
192 LINUX_T_UNKNOWN)
193
194/*
195 * If FreeBSD & Linux have a difference of opinion about what a trap
196 * means, deal with it here.
197 *
198 * MPSAFE
199 */
200static int
201translate_traps(int signal, int trap_code)
202{
203 if (signal != SIGBUS)
204 return signal;
205 switch (trap_code) {
206 case T_PROTFLT:
207 case T_TSSFLT:
208 case T_DOUBLEFLT:
209 case T_PAGEFLT:
210 return SIGSEGV;
211 default:
212 return signal;
213 }
214}
215
216static int
217linux_fixup(register_t **stack_base, struct image_params *imgp)
218{
219 register_t *argv, *envp;
220
221 argv = *stack_base;
222 envp = *stack_base + (imgp->argc + 1);
223 (*stack_base)--;
224 **stack_base = (intptr_t)(void *)envp;
225 (*stack_base)--;
226 **stack_base = (intptr_t)(void *)argv;
227 (*stack_base)--;
228 **stack_base = imgp->argc;
229 return 0;
230}
231
232static int
233elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
234{
235 Elf32_Auxargs *args;
236 register_t *pos;
237
238 KASSERT(curthread->td_proc == imgp->proc &&
239 (curthread->td_proc->p_flag & P_THREADED) == 0,
240 ("unsafe elf_linux_fixup(), should be curproc"));
241 args = (Elf32_Auxargs *)imgp->auxargs;
242 pos = *stack_base + (imgp->argc + imgp->envc + 2);
243
244 if (args->trace)
245 AUXARGS_ENTRY(pos, AT_DEBUG, 1);
246 if (args->execfd != -1)
247 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
248 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
249 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
250 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
251 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
252 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
253 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
254 AUXARGS_ENTRY(pos, AT_BASE, args->base);
255 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
256 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
257 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
258 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
259 AUXARGS_ENTRY(pos, AT_NULL, 0);
260
261 free(imgp->auxargs, M_TEMP);
262 imgp->auxargs = NULL;
263
264 (*stack_base)--;
265 **stack_base = (register_t)imgp->argc;
266 return 0;
267}
268
269extern int _ucodesel, _udatasel;
270extern unsigned long linux_sznonrtsigcode;
271
272static void
273linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
274{
275 struct thread *td = curthread;
276 struct proc *p = td->td_proc;
277 struct sigacts *psp;
278 struct trapframe *regs;
279 struct l_rt_sigframe *fp, frame;
280 int oonstack;
281
282 PROC_LOCK_ASSERT(p, MA_OWNED);
283 psp = p->p_sigacts;
284 mtx_assert(&psp->ps_mtx, MA_OWNED);
285 regs = td->td_frame;
286 oonstack = sigonstack(regs->tf_esp);
287
288#ifdef DEBUG
289 if (ldebug(rt_sendsig))
290 printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
291 catcher, sig, (void*)mask, code);
292#endif
293 /*
294 * Allocate space for the signal handler context.
295 */
296 if ((p->p_flag & P_ALTSTACK) && !oonstack &&
297 SIGISMEMBER(psp->ps_sigonstack, sig)) {
298 fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp +
299 p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe));
300 } else
301 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
302 mtx_unlock(&psp->ps_mtx);
303
304 /*
305 * Build the argument list for the signal handler.
306 */
307 if (p->p_sysent->sv_sigtbl)
308 if (sig <= p->p_sysent->sv_sigsize)
309 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
310
311 bzero(&frame, sizeof(frame));
312
313 frame.sf_handler = catcher;
314 frame.sf_sig = sig;
315 frame.sf_siginfo = &fp->sf_si;
316 frame.sf_ucontext = &fp->sf_sc;
317
318 /* Fill in POSIX parts */
319 frame.sf_si.lsi_signo = sig;
320 frame.sf_si.lsi_code = code;
321 frame.sf_si.lsi_addr = (void *)regs->tf_err;
322
323 /*
324 * Build the signal context to be used by sigreturn.
325 */
326 frame.sf_sc.uc_flags = 0; /* XXX ??? */
327 frame.sf_sc.uc_link = NULL; /* XXX ??? */
328
329 frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
330 frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
331 frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
332 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
333 PROC_UNLOCK(p);
334
335 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
336
337 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
338 frame.sf_sc.uc_mcontext.sc_gs = rgs();
339 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
340 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
341 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
342 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
343 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
344 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
345 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
346 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
347 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
348 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
349 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
350 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
351 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
352 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
353 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
354 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
355 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
356
357#ifdef DEBUG
358 if (ldebug(rt_sendsig))
359 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
360 frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
361 p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
362#endif
363
364 if (copyout(&frame, fp, sizeof(frame)) != 0) {
365 /*
366 * Process has trashed its stack; give it an illegal
367 * instruction to halt it in its tracks.
368 */
369#ifdef DEBUG
370 if (ldebug(rt_sendsig))
371 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
372 fp, oonstack);
373#endif
374 PROC_LOCK(p);
375 sigexit(td, SIGILL);
376 }
377
378 /*
379 * Build context to run handler in.
380 */
381 regs->tf_esp = (int)fp;
382 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
383 linux_sznonrtsigcode;
384 regs->tf_eflags &= ~(PSL_T | PSL_VM);
385 regs->tf_cs = _ucodesel;
386 regs->tf_ds = _udatasel;
387 regs->tf_es = _udatasel;
388 regs->tf_fs = _udatasel;
389 regs->tf_ss = _udatasel;
390 PROC_LOCK(p);
391 mtx_lock(&psp->ps_mtx);
392}
393
394
395/*
396 * Send an interrupt to process.
397 *
398 * Stack is set up to allow sigcode stored
399 * in u. to call routine, followed by kcall
400 * to sigreturn routine below. After sigreturn
401 * resets the signal mask, the stack, and the
402 * frame pointer, it returns to the user
403 * specified pc, psl.
404 */
405static void
406linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
407{
408 struct thread *td = curthread;
409 struct proc *p = td->td_proc;
410 struct sigacts *psp;
411 struct trapframe *regs;
412 struct l_sigframe *fp, frame;
413 l_sigset_t lmask;
414 int oonstack, i;
415
416 PROC_LOCK_ASSERT(p, MA_OWNED);
417 psp = p->p_sigacts;
418 mtx_assert(&psp->ps_mtx, MA_OWNED);
419 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
420 /* Signal handler installed with SA_SIGINFO. */
421 linux_rt_sendsig(catcher, sig, mask, code);
422 return;
423 }
424
425 regs = td->td_frame;
426 oonstack = sigonstack(regs->tf_esp);
427
428#ifdef DEBUG
429 if (ldebug(sendsig))
430 printf(ARGS(sendsig, "%p, %d, %p, %lu"),
431 catcher, sig, (void*)mask, code);
432#endif
433
434 /*
435 * Allocate space for the signal handler context.
436 */
437 if ((p->p_flag & P_ALTSTACK) && !oonstack &&
438 SIGISMEMBER(psp->ps_sigonstack, sig)) {
439 fp = (struct l_sigframe *)(p->p_sigstk.ss_sp +
440 p->p_sigstk.ss_size - sizeof(struct l_sigframe));
441 } else
442 fp = (struct l_sigframe *)regs->tf_esp - 1;
443 mtx_unlock(&psp->ps_mtx);
444 PROC_UNLOCK(p);
445
446 /*
447 * Build the argument list for the signal handler.
448 */
449 if (p->p_sysent->sv_sigtbl)
450 if (sig <= p->p_sysent->sv_sigsize)
451 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
452
453 bzero(&frame, sizeof(frame));
454
455 frame.sf_handler = catcher;
456 frame.sf_sig = sig;
457
458 bsd_to_linux_sigset(mask, &lmask);
459
460 /*
461 * Build the signal context to be used by sigreturn.
462 */
463 frame.sf_sc.sc_mask = lmask.__bits[0];
464 frame.sf_sc.sc_gs = rgs();
465 frame.sf_sc.sc_fs = regs->tf_fs;
466 frame.sf_sc.sc_es = regs->tf_es;
467 frame.sf_sc.sc_ds = regs->tf_ds;
468 frame.sf_sc.sc_edi = regs->tf_edi;
469 frame.sf_sc.sc_esi = regs->tf_esi;
470 frame.sf_sc.sc_ebp = regs->tf_ebp;
471 frame.sf_sc.sc_ebx = regs->tf_ebx;
472 frame.sf_sc.sc_edx = regs->tf_edx;
473 frame.sf_sc.sc_ecx = regs->tf_ecx;
474 frame.sf_sc.sc_eax = regs->tf_eax;
475 frame.sf_sc.sc_eip = regs->tf_eip;
476 frame.sf_sc.sc_cs = regs->tf_cs;
477 frame.sf_sc.sc_eflags = regs->tf_eflags;
478 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
479 frame.sf_sc.sc_ss = regs->tf_ss;
480 frame.sf_sc.sc_err = regs->tf_err;
481 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
482
483 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
484 frame.sf_extramask[i] = lmask.__bits[i+1];
485
486 if (copyout(&frame, fp, sizeof(frame)) != 0) {
487 /*
488 * Process has trashed its stack; give it an illegal
489 * instruction to halt it in its tracks.
490 */
491 PROC_LOCK(p);
492 sigexit(td, SIGILL);
493 }
494
495 /*
496 * Build context to run handler in.
497 */
498 regs->tf_esp = (int)fp;
499 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
500 regs->tf_eflags &= ~(PSL_T | PSL_VM);
501 regs->tf_cs = _ucodesel;
502 regs->tf_ds = _udatasel;
503 regs->tf_es = _udatasel;
504 regs->tf_fs = _udatasel;
505 regs->tf_ss = _udatasel;
506 PROC_LOCK(p);
507 mtx_lock(&psp->ps_mtx);
508}
509
510/*
511 * System call to cleanup state after a signal
512 * has been taken. Reset signal mask and
513 * stack state from context left by sendsig (above).
514 * Return to previous pc and psl as specified by
515 * context left by sendsig. Check carefully to
516 * make sure that the user has not modified the
517 * psl to gain improper privileges or to cause
518 * a machine fault.
519 */
520int
521linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
522{
523 struct proc *p = td->td_proc;
524 struct l_sigframe frame;
525 struct trapframe *regs;
526 l_sigset_t lmask;
527 int eflags, i;
528
529 regs = td->td_frame;
530
531#ifdef DEBUG
532 if (ldebug(sigreturn))
533 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
534#endif
535 /*
536 * The trampoline code hands us the sigframe.
537 * It is unsafe to keep track of it ourselves, in the event that a
538 * program jumps out of a signal handler.
539 */
540 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
541 return (EFAULT);
542
543 /*
544 * Check for security violations.
545 */
546#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
547 eflags = frame.sf_sc.sc_eflags;
548 /*
549 * XXX do allow users to change the privileged flag PSL_RF. The
550 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
551 * sometimes set it there too. tf_eflags is kept in the signal
552 * context during signal handling and there is no other place
553 * to remember it, so the PSL_RF bit may be corrupted by the
554 * signal handler without us knowing. Corruption of the PSL_RF
555 * bit at worst causes one more or one less debugger trap, so
556 * allowing it is fairly harmless.
557 */
558 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
559 return(EINVAL);
560
561 /*
562 * Don't allow users to load a valid privileged %cs. Let the
563 * hardware check for invalid selectors, excess privilege in
564 * other selectors, invalid %eip's and invalid %esp's.
565 */
566#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
567 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
568 trapsignal(td, SIGBUS, T_PROTFLT);
569 return(EINVAL);
570 }
571
572 lmask.__bits[0] = frame.sf_sc.sc_mask;
573 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
574 lmask.__bits[i+1] = frame.sf_extramask[i];
575 PROC_LOCK(p);
576 linux_to_bsd_sigset(&lmask, &td->td_sigmask);
577 SIG_CANTMASK(td->td_sigmask);
578 signotify(td);
579 PROC_UNLOCK(p);
580
581 /*
582 * Restore signal context.
583 */
584 /* %gs was restored by the trampoline. */
585 regs->tf_fs = frame.sf_sc.sc_fs;
586 regs->tf_es = frame.sf_sc.sc_es;
587 regs->tf_ds = frame.sf_sc.sc_ds;
588 regs->tf_edi = frame.sf_sc.sc_edi;
589 regs->tf_esi = frame.sf_sc.sc_esi;
590 regs->tf_ebp = frame.sf_sc.sc_ebp;
591 regs->tf_ebx = frame.sf_sc.sc_ebx;
592 regs->tf_edx = frame.sf_sc.sc_edx;
593 regs->tf_ecx = frame.sf_sc.sc_ecx;
594 regs->tf_eax = frame.sf_sc.sc_eax;
595 regs->tf_eip = frame.sf_sc.sc_eip;
596 regs->tf_cs = frame.sf_sc.sc_cs;
597 regs->tf_eflags = eflags;
598 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
599 regs->tf_ss = frame.sf_sc.sc_ss;
600
601 return (EJUSTRETURN);
602}
603
604/*
605 * System call to cleanup state after a signal
606 * has been taken. Reset signal mask and
607 * stack state from context left by rt_sendsig (above).
608 * Return to previous pc and psl as specified by
609 * context left by sendsig. Check carefully to
610 * make sure that the user has not modified the
611 * psl to gain improper privileges or to cause
612 * a machine fault.
613 */
614int
615linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
616{
617 struct proc *p = td->td_proc;
618 struct l_ucontext uc;
619 struct l_sigcontext *context;
620 l_stack_t *lss;
621 stack_t ss;
622 struct trapframe *regs;
623 int eflags;
624
625 regs = td->td_frame;
626
627#ifdef DEBUG
628 if (ldebug(rt_sigreturn))
629 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
630#endif
631 /*
632 * The trampoline code hands us the ucontext.
633 * It is unsafe to keep track of it ourselves, in the event that a
634 * program jumps out of a signal handler.
635 */
636 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
637 return (EFAULT);
638
639 context = &uc.uc_mcontext;
640
641 /*
642 * Check for security violations.
643 */
644#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
645 eflags = context->sc_eflags;
646 /*
647 * XXX do allow users to change the privileged flag PSL_RF. The
648 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
649 * sometimes set it there too. tf_eflags is kept in the signal
650 * context during signal handling and there is no other place
651 * to remember it, so the PSL_RF bit may be corrupted by the
652 * signal handler without us knowing. Corruption of the PSL_RF
653 * bit at worst causes one more or one less debugger trap, so
654 * allowing it is fairly harmless.
655 */
656 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
657 return(EINVAL);
658
659 /*
660 * Don't allow users to load a valid privileged %cs. Let the
661 * hardware check for invalid selectors, excess privilege in
662 * other selectors, invalid %eip's and invalid %esp's.
663 */
664#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
665 if (!CS_SECURE(context->sc_cs)) {
666 trapsignal(td, SIGBUS, T_PROTFLT);
667 return(EINVAL);
668 }
669
670 PROC_LOCK(p);
671 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
672 SIG_CANTMASK(td->td_sigmask);
673 signotify(td);
674 PROC_UNLOCK(p);
675
676 /*
677 * Restore signal context
678 */
679 /* %gs was restored by the trampoline. */
680 regs->tf_fs = context->sc_fs;
681 regs->tf_es = context->sc_es;
682 regs->tf_ds = context->sc_ds;
683 regs->tf_edi = context->sc_edi;
684 regs->tf_esi = context->sc_esi;
685 regs->tf_ebp = context->sc_ebp;
686 regs->tf_ebx = context->sc_ebx;
687 regs->tf_edx = context->sc_edx;
688 regs->tf_ecx = context->sc_ecx;
689 regs->tf_eax = context->sc_eax;
690 regs->tf_eip = context->sc_eip;
691 regs->tf_cs = context->sc_cs;
692 regs->tf_eflags = eflags;
693 regs->tf_esp = context->sc_esp_at_signal;
694 regs->tf_ss = context->sc_ss;
695
696 /*
697 * call sigaltstack & ignore results..
698 */
699 lss = &uc.uc_stack;
700 ss.ss_sp = lss->ss_sp;
701 ss.ss_size = lss->ss_size;
702 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
703
704#ifdef DEBUG
705 if (ldebug(rt_sigreturn))
706 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
707 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
708#endif
709 (void)kern_sigaltstack(td, &ss, NULL);
710
711 return (EJUSTRETURN);
712}
713
714/*
715 * MPSAFE
716 */
717static void
718linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
719{
720 args[0] = tf->tf_ebx;
721 args[1] = tf->tf_ecx;
722 args[2] = tf->tf_edx;
723 args[3] = tf->tf_esi;
724 args[4] = tf->tf_edi;
725 args[5] = tf->tf_ebp; /* Unconfirmed */
726 *params = NULL; /* no copyin */
727}
728
729
730
731/*
732 * Dump core, into a file named as described in the comments for
733 * expand_name(), unless the process was setuid/setgid.
734 */
735static int
736linux_aout_coredump(struct thread *td, struct vnode *vp, off_t limit)
737{
738 struct proc *p = td->td_proc;
739 struct ucred *cred = td->td_ucred;
740 struct vmspace *vm = p->p_vmspace;
741 char *tempuser;
742 int error;
743
744 if (ctob((uarea_pages + kstack_pages) +
745 vm->vm_dsize + vm->vm_ssize) >= limit)
746 return (EFAULT);
747 tempuser = malloc(ctob(uarea_pages + kstack_pages), M_TEMP,
748 M_WAITOK | M_ZERO);
749 if (tempuser == NULL)
750 return (ENOMEM);
751 PROC_LOCK(p);
752 fill_kinfo_proc(p, &p->p_uarea->u_kproc);
753 PROC_UNLOCK(p);
754 bcopy(p->p_uarea, tempuser, sizeof(struct user));
755 bcopy(td->td_frame,
756 tempuser + ctob(uarea_pages) +
757 ((caddr_t)td->td_frame - (caddr_t)td->td_kstack),
758 sizeof(struct trapframe));
759 error = vn_rdwr(UIO_WRITE, vp, (caddr_t)tempuser,
760 ctob(uarea_pages + kstack_pages),
761 (off_t)0, UIO_SYSSPACE, IO_UNIT, cred, NOCRED,
762 (int *)NULL, td);
763 free(tempuser, M_TEMP);
764 if (error == 0)
765 error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr,
766 (int)ctob(vm->vm_dsize),
767 (off_t)ctob(uarea_pages + kstack_pages), UIO_USERSPACE,
768 IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td);
769 if (error == 0)
770 error = vn_rdwr_inchunks(UIO_WRITE, vp,
771 (caddr_t)trunc_page(USRSTACK - ctob(vm->vm_ssize)),
772 round_page(ctob(vm->vm_ssize)),
773 (off_t)ctob(uarea_pages + kstack_pages) +
774 ctob(vm->vm_dsize), UIO_USERSPACE,
775 IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td);
776 return (error);
777}
778/*
779 * If a linux binary is exec'ing something, try this image activator
780 * first. We override standard shell script execution in order to
781 * be able to modify the interpreter path. We only do this if a linux
782 * binary is doing the exec, so we do not create an EXEC module for it.
783 */
784static int exec_linux_imgact_try(struct image_params *iparams);
785
786static int
787exec_linux_imgact_try(struct image_params *imgp)
788{
789 const char *head = (const char *)imgp->image_header;
790 int error = -1;
791
792 /*
793 * The interpreter for shell scripts run from a linux binary needs
794 * to be located in /compat/linux if possible in order to recursively
795 * maintain linux path emulation.
796 */
797 if (((const short *)head)[0] == SHELLMAGIC) {
798 /*
799 * Run our normal shell image activator. If it succeeds attempt
800 * to use the alternate path for the interpreter. If an alternate
801 * path is found, use our stringspace to store it.
802 */
803 if ((error = exec_shell_imgact(imgp)) == 0) {
804 char *rpath = NULL;
805
806 linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL,
807 imgp->interpreter_name, &rpath, 0);
808 if (rpath != imgp->interpreter_name) {
809 int len = strlen(rpath) + 1;
810
811 if (len <= MAXSHELLCMDLEN) {
812 memcpy(imgp->interpreter_name, rpath, len);
813 }
814 free(rpath, M_TEMP);
815 }
816 }
817 }
818 return(error);
819}
820
821/*
822 * exec_setregs may initialize some registers differently than Linux
823 * does, thus potentially confusing Linux binaries. If necessary, we
824 * override the exec_setregs default(s) here.
825 */
826static void
827exec_linux_setregs(struct thread *td, u_long entry,
828 u_long stack, u_long ps_strings)
829{
830 struct pcb *pcb = td->td_pcb;
831
832 exec_setregs(td, entry, stack, ps_strings);
833
834 /* Linux sets %gs to 0, we default to _udatasel */
835 pcb->pcb_gs = 0; load_gs(0);
836}
837
838struct sysentvec linux_sysvec = {
839 LINUX_SYS_MAXSYSCALL,
840 linux_sysent,
841 0xff,
842 LINUX_SIGTBLSZ,
843 bsd_to_linux_signal,
844 ELAST + 1,
845 bsd_to_linux_errno,
846 translate_traps,
847 linux_fixup,
848 linux_sendsig,
849 linux_sigcode,
850 &linux_szsigcode,
851 linux_prepsyscall,
852 "Linux a.out",
853 linux_aout_coredump,
854 exec_linux_imgact_try,
855 LINUX_MINSIGSTKSZ,
856 PAGE_SIZE,
857 VM_MIN_ADDRESS,
858 VM_MAXUSER_ADDRESS,
859 USRSTACK,
860 PS_STRINGS,
861 VM_PROT_ALL,
862 exec_copyout_strings,
863 exec_linux_setregs
864};
865
866struct sysentvec elf_linux_sysvec = {
867 LINUX_SYS_MAXSYSCALL,
868 linux_sysent,
869 0xff,
870 LINUX_SIGTBLSZ,
871 bsd_to_linux_signal,
872 ELAST + 1,
873 bsd_to_linux_errno,
874 translate_traps,
875 elf_linux_fixup,
876 linux_sendsig,
877 linux_sigcode,
878 &linux_szsigcode,
879 linux_prepsyscall,
880 "Linux ELF",
881 elf32_coredump,
882 exec_linux_imgact_try,
883 LINUX_MINSIGSTKSZ,
884 PAGE_SIZE,
885 VM_MIN_ADDRESS,
886 VM_MAXUSER_ADDRESS,
887 USRSTACK,
888 PS_STRINGS,
889 VM_PROT_ALL,
890 exec_copyout_strings,
891 exec_linux_setregs
892};
893
894static Elf32_Brandinfo linux_brand = {
895 ELFOSABI_LINUX,
896 EM_386,
897 "Linux",
898 "/compat/linux",
899 "/lib/ld-linux.so.1",
900 &elf_linux_sysvec
901 };
902
903static Elf32_Brandinfo linux_glibc2brand = {
904 ELFOSABI_LINUX,
905 EM_386,
906 "Linux",
907 "/compat/linux",
908 "/lib/ld-linux.so.2",
909 &elf_linux_sysvec
910 };
911
912Elf32_Brandinfo *linux_brandlist[] = {
913 &linux_brand,
914 &linux_glibc2brand,
915 NULL
916 };
917
918static int
919linux_elf_modevent(module_t mod, int type, void *data)
920{
921 Elf32_Brandinfo **brandinfo;
922 int error;
923 struct linux_ioctl_handler **lihp;
924
925 error = 0;
926
927 switch(type) {
928 case MOD_LOAD:
929 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
930 ++brandinfo)
931 if (elf32_insert_brand_entry(*brandinfo) < 0)
932 error = EINVAL;
933 if (error == 0) {
934 SET_FOREACH(lihp, linux_ioctl_handler_set)
935 linux_ioctl_register_handler(*lihp);
936 if (bootverbose)
937 printf("Linux ELF exec handler installed\n");
938 } else
939 printf("cannot insert Linux ELF brand handler\n");
940 break;
941 case MOD_UNLOAD:
942 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
943 ++brandinfo)
944 if (elf32_brand_inuse(*brandinfo))
945 error = EBUSY;
946 if (error == 0) {
947 for (brandinfo = &linux_brandlist[0];
948 *brandinfo != NULL; ++brandinfo)
949 if (elf32_remove_brand_entry(*brandinfo) < 0)
950 error = EINVAL;
951 }
952 if (error == 0) {
953 SET_FOREACH(lihp, linux_ioctl_handler_set)
954 linux_ioctl_unregister_handler(*lihp);
955 if (bootverbose)
956 printf("Linux ELF exec handler removed\n");
957 linux_mib_destroy();
958 } else
959 printf("Could not deinstall ELF interpreter entry\n");
960 break;
961 default:
962 break;
963 }
964 return error;
965}
966
967static moduledata_t linux_elf_mod = {
968 "linuxelf",
969 linux_elf_modevent,
970 0
971};
972
973DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);