Deleted Added
sdiff udiff text old ( 177997 ) new ( 183322 )
full compact
1/*-
2 * Copyright (c) 1994-1996 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/i386/linux/linux_sysvec.c 183322 2008-09-24 10:14:37Z kib $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/exec.h>
35#include <sys/fcntl.h>
36#include <sys/imgact.h>
37#include <sys/imgact_aout.h>
38#include <sys/imgact_elf.h>
39#include <sys/kernel.h>
40#include <sys/lock.h>
41#include <sys/malloc.h>
42#include <sys/module.h>
43#include <sys/mutex.h>
44#include <sys/proc.h>
45#include <sys/signalvar.h>
46#include <sys/syscallsubr.h>
47#include <sys/sysent.h>
48#include <sys/sysproto.h>
49#include <sys/vnode.h>
50#include <sys/eventhandler.h>
51
52#include <vm/vm.h>
53#include <vm/pmap.h>
54#include <vm/vm_extern.h>
55#include <vm/vm_map.h>
56#include <vm/vm_object.h>
57#include <vm/vm_page.h>
58#include <vm/vm_param.h>
59
60#include <machine/cpu.h>
61#include <machine/md_var.h>
62#include <machine/pcb.h>
63
64#include <i386/linux/linux.h>
65#include <i386/linux/linux_proto.h>
66#include <compat/linux/linux_emul.h>
67#include <compat/linux/linux_mib.h>
68#include <compat/linux/linux_signal.h>
69#include <compat/linux/linux_util.h>
70
71MODULE_VERSION(linux, 1);
72
73MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
74
75#if BYTE_ORDER == LITTLE_ENDIAN
76#define SHELLMAGIC 0x2123 /* #! */
77#else
78#define SHELLMAGIC 0x2321
79#endif
80
81/*
82 * Allow the sendsig functions to use the ldebug() facility
83 * even though they are not syscalls themselves. Map them
84 * to syscall 0. This is slightly less bogus than using
85 * ldebug(sigreturn).
86 */
87#define LINUX_SYS_linux_rt_sendsig 0
88#define LINUX_SYS_linux_sendsig 0
89
90#define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr)))
91#define __LINUX_NPXCW__ 0x37f
92
93extern char linux_sigcode[];
94extern int linux_szsigcode;
95
96extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
97
98SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
99SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
100
101static int linux_fixup(register_t **stack_base,
102 struct image_params *iparams);
103static int elf_linux_fixup(register_t **stack_base,
104 struct image_params *iparams);
105static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
106 caddr_t *params);
107static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
108static void exec_linux_setregs(struct thread *td, u_long entry,
109 u_long stack, u_long ps_strings);
110
111extern LIST_HEAD(futex_list, futex) futex_list;
112extern struct sx futex_sx;
113
114static eventhandler_tag linux_exit_tag;
115static eventhandler_tag linux_schedtail_tag;
116static eventhandler_tag linux_exec_tag;
117
118/*
119 * Linux syscalls return negative errno's, we do positive and map them
120 * Reference:
121 * FreeBSD: src/sys/sys/errno.h
122 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
123 * linux-2.6.17.8/include/asm-generic/errno.h
124 */
125static int bsd_to_linux_errno[ELAST + 1] = {
126 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
127 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
128 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
129 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
130 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
131 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
132 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
133 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
134 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
135 -72, -67, -71
136};
137
138int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
139 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
140 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
141 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
142 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
143 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
144 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
145 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
146 0, LINUX_SIGUSR1, LINUX_SIGUSR2
147};
148
149int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
150 SIGHUP, SIGINT, SIGQUIT, SIGILL,
151 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
152 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
153 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
154 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
155 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
156 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
157 SIGIO, SIGURG, SIGSYS
158};
159
160#define LINUX_T_UNKNOWN 255
161static int _bsd_to_linux_trapcode[] = {
162 LINUX_T_UNKNOWN, /* 0 */
163 6, /* 1 T_PRIVINFLT */
164 LINUX_T_UNKNOWN, /* 2 */
165 3, /* 3 T_BPTFLT */
166 LINUX_T_UNKNOWN, /* 4 */
167 LINUX_T_UNKNOWN, /* 5 */
168 16, /* 6 T_ARITHTRAP */
169 254, /* 7 T_ASTFLT */
170 LINUX_T_UNKNOWN, /* 8 */
171 13, /* 9 T_PROTFLT */
172 1, /* 10 T_TRCTRAP */
173 LINUX_T_UNKNOWN, /* 11 */
174 14, /* 12 T_PAGEFLT */
175 LINUX_T_UNKNOWN, /* 13 */
176 17, /* 14 T_ALIGNFLT */
177 LINUX_T_UNKNOWN, /* 15 */
178 LINUX_T_UNKNOWN, /* 16 */
179 LINUX_T_UNKNOWN, /* 17 */
180 0, /* 18 T_DIVIDE */
181 2, /* 19 T_NMI */
182 4, /* 20 T_OFLOW */
183 5, /* 21 T_BOUND */
184 7, /* 22 T_DNA */
185 8, /* 23 T_DOUBLEFLT */
186 9, /* 24 T_FPOPFLT */
187 10, /* 25 T_TSSFLT */
188 11, /* 26 T_SEGNPFLT */
189 12, /* 27 T_STKFLT */
190 18, /* 28 T_MCHK */
191 19, /* 29 T_XMMFLT */
192 15 /* 30 T_RESERVED */
193};
194#define bsd_to_linux_trapcode(code) \
195 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
196 _bsd_to_linux_trapcode[(code)]: \
197 LINUX_T_UNKNOWN)
198
199/*
200 * If FreeBSD & Linux have a difference of opinion about what a trap
201 * means, deal with it here.
202 *
203 * MPSAFE
204 */
205static int
206translate_traps(int signal, int trap_code)
207{
208 if (signal != SIGBUS)
209 return signal;
210 switch (trap_code) {
211 case T_PROTFLT:
212 case T_TSSFLT:
213 case T_DOUBLEFLT:
214 case T_PAGEFLT:
215 return SIGSEGV;
216 default:
217 return signal;
218 }
219}
220
221static int
222linux_fixup(register_t **stack_base, struct image_params *imgp)
223{
224 register_t *argv, *envp;
225
226 argv = *stack_base;
227 envp = *stack_base + (imgp->args->argc + 1);
228 (*stack_base)--;
229 **stack_base = (intptr_t)(void *)envp;
230 (*stack_base)--;
231 **stack_base = (intptr_t)(void *)argv;
232 (*stack_base)--;
233 **stack_base = imgp->args->argc;
234 return 0;
235}
236
237static int
238elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
239{
240 Elf32_Auxargs *args;
241 register_t *pos;
242
243 KASSERT(curthread->td_proc == imgp->proc,
244 ("unsafe elf_linux_fixup(), should be curproc"));
245 args = (Elf32_Auxargs *)imgp->auxargs;
246 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
247
248 if (args->trace)
249 AUXARGS_ENTRY(pos, AT_DEBUG, 1);
250 if (args->execfd != -1)
251 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
252 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
253 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
254 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
255 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
256 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
257 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
258 AUXARGS_ENTRY(pos, AT_BASE, args->base);
259 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
260 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
261 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
262 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
263 AUXARGS_ENTRY(pos, AT_NULL, 0);
264
265 free(imgp->auxargs, M_TEMP);
266 imgp->auxargs = NULL;
267
268 (*stack_base)--;
269 **stack_base = (register_t)imgp->args->argc;
270 return 0;
271}
272
273extern int _ucodesel, _udatasel;
274extern unsigned long linux_sznonrtsigcode;
275
276static void
277linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
278{
279 struct thread *td = curthread;
280 struct proc *p = td->td_proc;
281 struct sigacts *psp;
282 struct trapframe *regs;
283 struct l_rt_sigframe *fp, frame;
284 int sig, code;
285 int oonstack;
286
287 sig = ksi->ksi_signo;
288 code = ksi->ksi_code;
289 PROC_LOCK_ASSERT(p, MA_OWNED);
290 psp = p->p_sigacts;
291 mtx_assert(&psp->ps_mtx, MA_OWNED);
292 regs = td->td_frame;
293 oonstack = sigonstack(regs->tf_esp);
294
295#ifdef DEBUG
296 if (ldebug(rt_sendsig))
297 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
298 catcher, sig, (void*)mask, code);
299#endif
300 /*
301 * Allocate space for the signal handler context.
302 */
303 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
304 SIGISMEMBER(psp->ps_sigonstack, sig)) {
305 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
306 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
307 } else
308 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
309 mtx_unlock(&psp->ps_mtx);
310
311 /*
312 * Build the argument list for the signal handler.
313 */
314 if (p->p_sysent->sv_sigtbl)
315 if (sig <= p->p_sysent->sv_sigsize)
316 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
317
318 bzero(&frame, sizeof(frame));
319
320 frame.sf_handler = catcher;
321 frame.sf_sig = sig;
322 frame.sf_siginfo = &fp->sf_si;
323 frame.sf_ucontext = &fp->sf_sc;
324
325 /* Fill in POSIX parts */
326 frame.sf_si.lsi_signo = sig;
327 frame.sf_si.lsi_code = code;
328 frame.sf_si.lsi_addr = ksi->ksi_addr;
329
330 /*
331 * Build the signal context to be used by sigreturn.
332 */
333 frame.sf_sc.uc_flags = 0; /* XXX ??? */
334 frame.sf_sc.uc_link = NULL; /* XXX ??? */
335
336 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
337 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
338 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
339 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
340 PROC_UNLOCK(p);
341
342 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
343
344 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
345 frame.sf_sc.uc_mcontext.sc_gs = rgs();
346 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
347 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
348 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
349 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
350 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
351 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
352 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
353 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
354 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
355 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
356 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
357 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
358 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
359 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
360 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
361 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
362 frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr;
363 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
364
365#ifdef DEBUG
366 if (ldebug(rt_sendsig))
367 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
368 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
369 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
370#endif
371
372 if (copyout(&frame, fp, sizeof(frame)) != 0) {
373 /*
374 * Process has trashed its stack; give it an illegal
375 * instruction to halt it in its tracks.
376 */
377#ifdef DEBUG
378 if (ldebug(rt_sendsig))
379 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
380 fp, oonstack);
381#endif
382 PROC_LOCK(p);
383 sigexit(td, SIGILL);
384 }
385
386 /*
387 * Build context to run handler in.
388 */
389 regs->tf_esp = (int)fp;
390 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
391 linux_sznonrtsigcode;
392 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
393 regs->tf_cs = _ucodesel;
394 regs->tf_ds = _udatasel;
395 regs->tf_es = _udatasel;
396 regs->tf_fs = _udatasel;
397 regs->tf_ss = _udatasel;
398 PROC_LOCK(p);
399 mtx_lock(&psp->ps_mtx);
400}
401
402
403/*
404 * Send an interrupt to process.
405 *
406 * Stack is set up to allow sigcode stored
407 * in u. to call routine, followed by kcall
408 * to sigreturn routine below. After sigreturn
409 * resets the signal mask, the stack, and the
410 * frame pointer, it returns to the user
411 * specified pc, psl.
412 */
413static void
414linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
415{
416 struct thread *td = curthread;
417 struct proc *p = td->td_proc;
418 struct sigacts *psp;
419 struct trapframe *regs;
420 struct l_sigframe *fp, frame;
421 l_sigset_t lmask;
422 int sig, code;
423 int oonstack, i;
424
425 PROC_LOCK_ASSERT(p, MA_OWNED);
426 psp = p->p_sigacts;
427 sig = ksi->ksi_signo;
428 code = ksi->ksi_code;
429 mtx_assert(&psp->ps_mtx, MA_OWNED);
430 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
431 /* Signal handler installed with SA_SIGINFO. */
432 linux_rt_sendsig(catcher, ksi, mask);
433 return;
434 }
435 regs = td->td_frame;
436 oonstack = sigonstack(regs->tf_esp);
437
438#ifdef DEBUG
439 if (ldebug(sendsig))
440 printf(ARGS(sendsig, "%p, %d, %p, %u"),
441 catcher, sig, (void*)mask, code);
442#endif
443
444 /*
445 * Allocate space for the signal handler context.
446 */
447 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
448 SIGISMEMBER(psp->ps_sigonstack, sig)) {
449 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
450 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
451 } else
452 fp = (struct l_sigframe *)regs->tf_esp - 1;
453 mtx_unlock(&psp->ps_mtx);
454 PROC_UNLOCK(p);
455
456 /*
457 * Build the argument list for the signal handler.
458 */
459 if (p->p_sysent->sv_sigtbl)
460 if (sig <= p->p_sysent->sv_sigsize)
461 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
462
463 bzero(&frame, sizeof(frame));
464
465 frame.sf_handler = catcher;
466 frame.sf_sig = sig;
467
468 bsd_to_linux_sigset(mask, &lmask);
469
470 /*
471 * Build the signal context to be used by sigreturn.
472 */
473 frame.sf_sc.sc_mask = lmask.__bits[0];
474 frame.sf_sc.sc_gs = rgs();
475 frame.sf_sc.sc_fs = regs->tf_fs;
476 frame.sf_sc.sc_es = regs->tf_es;
477 frame.sf_sc.sc_ds = regs->tf_ds;
478 frame.sf_sc.sc_edi = regs->tf_edi;
479 frame.sf_sc.sc_esi = regs->tf_esi;
480 frame.sf_sc.sc_ebp = regs->tf_ebp;
481 frame.sf_sc.sc_ebx = regs->tf_ebx;
482 frame.sf_sc.sc_edx = regs->tf_edx;
483 frame.sf_sc.sc_ecx = regs->tf_ecx;
484 frame.sf_sc.sc_eax = regs->tf_eax;
485 frame.sf_sc.sc_eip = regs->tf_eip;
486 frame.sf_sc.sc_cs = regs->tf_cs;
487 frame.sf_sc.sc_eflags = regs->tf_eflags;
488 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
489 frame.sf_sc.sc_ss = regs->tf_ss;
490 frame.sf_sc.sc_err = regs->tf_err;
491 frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr;
492 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
493
494 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
495 frame.sf_extramask[i] = lmask.__bits[i+1];
496
497 if (copyout(&frame, fp, sizeof(frame)) != 0) {
498 /*
499 * Process has trashed its stack; give it an illegal
500 * instruction to halt it in its tracks.
501 */
502 PROC_LOCK(p);
503 sigexit(td, SIGILL);
504 }
505
506 /*
507 * Build context to run handler in.
508 */
509 regs->tf_esp = (int)fp;
510 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
511 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
512 regs->tf_cs = _ucodesel;
513 regs->tf_ds = _udatasel;
514 regs->tf_es = _udatasel;
515 regs->tf_fs = _udatasel;
516 regs->tf_ss = _udatasel;
517 PROC_LOCK(p);
518 mtx_lock(&psp->ps_mtx);
519}
520
521/*
522 * System call to cleanup state after a signal
523 * has been taken. Reset signal mask and
524 * stack state from context left by sendsig (above).
525 * Return to previous pc and psl as specified by
526 * context left by sendsig. Check carefully to
527 * make sure that the user has not modified the
528 * psl to gain improper privileges or to cause
529 * a machine fault.
530 */
531int
532linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
533{
534 struct proc *p = td->td_proc;
535 struct l_sigframe frame;
536 struct trapframe *regs;
537 l_sigset_t lmask;
538 int eflags, i;
539 ksiginfo_t ksi;
540
541 regs = td->td_frame;
542
543#ifdef DEBUG
544 if (ldebug(sigreturn))
545 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
546#endif
547 /*
548 * The trampoline code hands us the sigframe.
549 * It is unsafe to keep track of it ourselves, in the event that a
550 * program jumps out of a signal handler.
551 */
552 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
553 return (EFAULT);
554
555 /*
556 * Check for security violations.
557 */
558#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
559 eflags = frame.sf_sc.sc_eflags;
560 /*
561 * XXX do allow users to change the privileged flag PSL_RF. The
562 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
563 * sometimes set it there too. tf_eflags is kept in the signal
564 * context during signal handling and there is no other place
565 * to remember it, so the PSL_RF bit may be corrupted by the
566 * signal handler without us knowing. Corruption of the PSL_RF
567 * bit at worst causes one more or one less debugger trap, so
568 * allowing it is fairly harmless.
569 */
570 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
571 return(EINVAL);
572
573 /*
574 * Don't allow users to load a valid privileged %cs. Let the
575 * hardware check for invalid selectors, excess privilege in
576 * other selectors, invalid %eip's and invalid %esp's.
577 */
578#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
579 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
580 ksiginfo_init_trap(&ksi);
581 ksi.ksi_signo = SIGBUS;
582 ksi.ksi_code = BUS_OBJERR;
583 ksi.ksi_trapno = T_PROTFLT;
584 ksi.ksi_addr = (void *)regs->tf_eip;
585 trapsignal(td, &ksi);
586 return(EINVAL);
587 }
588
589 lmask.__bits[0] = frame.sf_sc.sc_mask;
590 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
591 lmask.__bits[i+1] = frame.sf_extramask[i];
592 PROC_LOCK(p);
593 linux_to_bsd_sigset(&lmask, &td->td_sigmask);
594 SIG_CANTMASK(td->td_sigmask);
595 signotify(td);
596 PROC_UNLOCK(p);
597
598 /*
599 * Restore signal context.
600 */
601 /* %gs was restored by the trampoline. */
602 regs->tf_fs = frame.sf_sc.sc_fs;
603 regs->tf_es = frame.sf_sc.sc_es;
604 regs->tf_ds = frame.sf_sc.sc_ds;
605 regs->tf_edi = frame.sf_sc.sc_edi;
606 regs->tf_esi = frame.sf_sc.sc_esi;
607 regs->tf_ebp = frame.sf_sc.sc_ebp;
608 regs->tf_ebx = frame.sf_sc.sc_ebx;
609 regs->tf_edx = frame.sf_sc.sc_edx;
610 regs->tf_ecx = frame.sf_sc.sc_ecx;
611 regs->tf_eax = frame.sf_sc.sc_eax;
612 regs->tf_eip = frame.sf_sc.sc_eip;
613 regs->tf_cs = frame.sf_sc.sc_cs;
614 regs->tf_eflags = eflags;
615 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
616 regs->tf_ss = frame.sf_sc.sc_ss;
617
618 return (EJUSTRETURN);
619}
620
621/*
622 * System call to cleanup state after a signal
623 * has been taken. Reset signal mask and
624 * stack state from context left by rt_sendsig (above).
625 * Return to previous pc and psl as specified by
626 * context left by sendsig. Check carefully to
627 * make sure that the user has not modified the
628 * psl to gain improper privileges or to cause
629 * a machine fault.
630 */
631int
632linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
633{
634 struct proc *p = td->td_proc;
635 struct l_ucontext uc;
636 struct l_sigcontext *context;
637 l_stack_t *lss;
638 stack_t ss;
639 struct trapframe *regs;
640 int eflags;
641 ksiginfo_t ksi;
642
643 regs = td->td_frame;
644
645#ifdef DEBUG
646 if (ldebug(rt_sigreturn))
647 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
648#endif
649 /*
650 * The trampoline code hands us the ucontext.
651 * It is unsafe to keep track of it ourselves, in the event that a
652 * program jumps out of a signal handler.
653 */
654 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
655 return (EFAULT);
656
657 context = &uc.uc_mcontext;
658
659 /*
660 * Check for security violations.
661 */
662#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
663 eflags = context->sc_eflags;
664 /*
665 * XXX do allow users to change the privileged flag PSL_RF. The
666 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
667 * sometimes set it there too. tf_eflags is kept in the signal
668 * context during signal handling and there is no other place
669 * to remember it, so the PSL_RF bit may be corrupted by the
670 * signal handler without us knowing. Corruption of the PSL_RF
671 * bit at worst causes one more or one less debugger trap, so
672 * allowing it is fairly harmless.
673 */
674 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
675 return(EINVAL);
676
677 /*
678 * Don't allow users to load a valid privileged %cs. Let the
679 * hardware check for invalid selectors, excess privilege in
680 * other selectors, invalid %eip's and invalid %esp's.
681 */
682#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
683 if (!CS_SECURE(context->sc_cs)) {
684 ksiginfo_init_trap(&ksi);
685 ksi.ksi_signo = SIGBUS;
686 ksi.ksi_code = BUS_OBJERR;
687 ksi.ksi_trapno = T_PROTFLT;
688 ksi.ksi_addr = (void *)regs->tf_eip;
689 trapsignal(td, &ksi);
690 return(EINVAL);
691 }
692
693 PROC_LOCK(p);
694 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
695 SIG_CANTMASK(td->td_sigmask);
696 signotify(td);
697 PROC_UNLOCK(p);
698
699 /*
700 * Restore signal context
701 */
702 /* %gs was restored by the trampoline. */
703 regs->tf_fs = context->sc_fs;
704 regs->tf_es = context->sc_es;
705 regs->tf_ds = context->sc_ds;
706 regs->tf_edi = context->sc_edi;
707 regs->tf_esi = context->sc_esi;
708 regs->tf_ebp = context->sc_ebp;
709 regs->tf_ebx = context->sc_ebx;
710 regs->tf_edx = context->sc_edx;
711 regs->tf_ecx = context->sc_ecx;
712 regs->tf_eax = context->sc_eax;
713 regs->tf_eip = context->sc_eip;
714 regs->tf_cs = context->sc_cs;
715 regs->tf_eflags = eflags;
716 regs->tf_esp = context->sc_esp_at_signal;
717 regs->tf_ss = context->sc_ss;
718
719 /*
720 * call sigaltstack & ignore results..
721 */
722 lss = &uc.uc_stack;
723 ss.ss_sp = lss->ss_sp;
724 ss.ss_size = lss->ss_size;
725 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
726
727#ifdef DEBUG
728 if (ldebug(rt_sigreturn))
729 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
730 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
731#endif
732 (void)kern_sigaltstack(td, &ss, NULL);
733
734 return (EJUSTRETURN);
735}
736
737/*
738 * MPSAFE
739 */
740static void
741linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
742{
743 args[0] = tf->tf_ebx;
744 args[1] = tf->tf_ecx;
745 args[2] = tf->tf_edx;
746 args[3] = tf->tf_esi;
747 args[4] = tf->tf_edi;
748 args[5] = tf->tf_ebp; /* Unconfirmed */
749 *params = NULL; /* no copyin */
750}
751
752/*
753 * If a linux binary is exec'ing something, try this image activator
754 * first. We override standard shell script execution in order to
755 * be able to modify the interpreter path. We only do this if a linux
756 * binary is doing the exec, so we do not create an EXEC module for it.
757 */
758static int exec_linux_imgact_try(struct image_params *iparams);
759
760static int
761exec_linux_imgact_try(struct image_params *imgp)
762{
763 const char *head = (const char *)imgp->image_header;
764 char *rpath;
765 int error = -1, len;
766
767 /*
768 * The interpreter for shell scripts run from a linux binary needs
769 * to be located in /compat/linux if possible in order to recursively
770 * maintain linux path emulation.
771 */
772 if (((const short *)head)[0] == SHELLMAGIC) {
773 /*
774 * Run our normal shell image activator. If it succeeds attempt
775 * to use the alternate path for the interpreter. If an alternate
776 * path is found, use our stringspace to store it.
777 */
778 if ((error = exec_shell_imgact(imgp)) == 0) {
779 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
780 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
781 if (rpath != NULL) {
782 len = strlen(rpath) + 1;
783
784 if (len <= MAXSHELLCMDLEN) {
785 memcpy(imgp->interpreter_name, rpath, len);
786 }
787 free(rpath, M_TEMP);
788 }
789 }
790 }
791 return(error);
792}
793
794/*
795 * exec_setregs may initialize some registers differently than Linux
796 * does, thus potentially confusing Linux binaries. If necessary, we
797 * override the exec_setregs default(s) here.
798 */
799static void
800exec_linux_setregs(struct thread *td, u_long entry,
801 u_long stack, u_long ps_strings)
802{
803 static const u_short control = __LINUX_NPXCW__;
804 struct pcb *pcb = td->td_pcb;
805
806 exec_setregs(td, entry, stack, ps_strings);
807
808 /* Linux sets %gs to 0, we default to _udatasel */
809 pcb->pcb_gs = 0; load_gs(0);
810
811 /* Linux sets the i387 to extended precision. */
812 fldcw(&control);
813}
814
815struct sysentvec linux_sysvec = {
816 .sv_size = LINUX_SYS_MAXSYSCALL,
817 .sv_table = linux_sysent,
818 .sv_mask = 0,
819 .sv_sigsize = LINUX_SIGTBLSZ,
820 .sv_sigtbl = bsd_to_linux_signal,
821 .sv_errsize = ELAST + 1,
822 .sv_errtbl = bsd_to_linux_errno,
823 .sv_transtrap = translate_traps,
824 .sv_fixup = linux_fixup,
825 .sv_sendsig = linux_sendsig,
826 .sv_sigcode = linux_sigcode,
827 .sv_szsigcode = &linux_szsigcode,
828 .sv_prepsyscall = linux_prepsyscall,
829 .sv_name = "Linux a.out",
830 .sv_coredump = NULL,
831 .sv_imgact_try = exec_linux_imgact_try,
832 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
833 .sv_pagesize = PAGE_SIZE,
834 .sv_minuser = VM_MIN_ADDRESS,
835 .sv_maxuser = VM_MAXUSER_ADDRESS,
836 .sv_usrstack = USRSTACK,
837 .sv_psstrings = PS_STRINGS,
838 .sv_stackprot = VM_PROT_ALL,
839 .sv_copyout_strings = exec_copyout_strings,
840 .sv_setregs = exec_linux_setregs,
841 .sv_fixlimit = NULL,
842 .sv_maxssiz = NULL
843};
844
845struct sysentvec elf_linux_sysvec = {
846 .sv_size = LINUX_SYS_MAXSYSCALL,
847 .sv_table = linux_sysent,
848 .sv_mask = 0,
849 .sv_sigsize = LINUX_SIGTBLSZ,
850 .sv_sigtbl = bsd_to_linux_signal,
851 .sv_errsize = ELAST + 1,
852 .sv_errtbl = bsd_to_linux_errno,
853 .sv_transtrap = translate_traps,
854 .sv_fixup = elf_linux_fixup,
855 .sv_sendsig = linux_sendsig,
856 .sv_sigcode = linux_sigcode,
857 .sv_szsigcode = &linux_szsigcode,
858 .sv_prepsyscall = linux_prepsyscall,
859 .sv_name = "Linux ELF",
860 .sv_coredump = elf32_coredump,
861 .sv_imgact_try = exec_linux_imgact_try,
862 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
863 .sv_pagesize = PAGE_SIZE,
864 .sv_minuser = VM_MIN_ADDRESS,
865 .sv_maxuser = VM_MAXUSER_ADDRESS,
866 .sv_usrstack = USRSTACK,
867 .sv_psstrings = PS_STRINGS,
868 .sv_stackprot = VM_PROT_ALL,
869 .sv_copyout_strings = exec_copyout_strings,
870 .sv_setregs = exec_linux_setregs,
871 .sv_fixlimit = NULL,
872 .sv_maxssiz = NULL
873};
874
875static Elf32_Brandinfo linux_brand = {
876 .brand = ELFOSABI_LINUX,
877 .machine = EM_386,
878 .compat_3_brand = "Linux",
879 .emul_path = "/compat/linux",
880 .interp_path = "/lib/ld-linux.so.1",
881 .sysvec = &elf_linux_sysvec,
882 .interp_newpath = NULL,
883 .flags = BI_CAN_EXEC_DYN,
884};
885
886static Elf32_Brandinfo linux_glibc2brand = {
887 .brand = ELFOSABI_LINUX,
888 .machine = EM_386,
889 .compat_3_brand = "Linux",
890 .emul_path = "/compat/linux",
891 .interp_path = "/lib/ld-linux.so.2",
892 .sysvec = &elf_linux_sysvec,
893 .interp_newpath = NULL,
894 .flags = BI_CAN_EXEC_DYN,
895};
896
897Elf32_Brandinfo *linux_brandlist[] = {
898 &linux_brand,
899 &linux_glibc2brand,
900 NULL
901};
902
903static int
904linux_elf_modevent(module_t mod, int type, void *data)
905{
906 Elf32_Brandinfo **brandinfo;
907 int error;
908 struct linux_ioctl_handler **lihp;
909 struct linux_device_handler **ldhp;
910
911 error = 0;
912
913 switch(type) {
914 case MOD_LOAD:
915 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
916 ++brandinfo)
917 if (elf32_insert_brand_entry(*brandinfo) < 0)
918 error = EINVAL;
919 if (error == 0) {
920 SET_FOREACH(lihp, linux_ioctl_handler_set)
921 linux_ioctl_register_handler(*lihp);
922 SET_FOREACH(ldhp, linux_device_handler_set)
923 linux_device_register_handler(*ldhp);
924 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
925 sx_init(&emul_shared_lock, "emuldata->shared lock");
926 LIST_INIT(&futex_list);
927 sx_init(&futex_sx, "futex protection lock");
928 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
929 NULL, 1000);
930 linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail,
931 NULL, 1000);
932 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
933 NULL, 1000);
934 if (bootverbose)
935 printf("Linux ELF exec handler installed\n");
936 } else
937 printf("cannot insert Linux ELF brand handler\n");
938 break;
939 case MOD_UNLOAD:
940 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
941 ++brandinfo)
942 if (elf32_brand_inuse(*brandinfo))
943 error = EBUSY;
944 if (error == 0) {
945 for (brandinfo = &linux_brandlist[0];
946 *brandinfo != NULL; ++brandinfo)
947 if (elf32_remove_brand_entry(*brandinfo) < 0)
948 error = EINVAL;
949 }
950 if (error == 0) {
951 SET_FOREACH(lihp, linux_ioctl_handler_set)
952 linux_ioctl_unregister_handler(*lihp);
953 SET_FOREACH(ldhp, linux_device_handler_set)
954 linux_device_unregister_handler(*ldhp);
955 mtx_destroy(&emul_lock);
956 sx_destroy(&emul_shared_lock);
957 sx_destroy(&futex_sx);
958 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
959 EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
960 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
961 if (bootverbose)
962 printf("Linux ELF exec handler removed\n");
963 } else
964 printf("Could not deinstall ELF interpreter entry\n");
965 break;
966 default:
967 return EOPNOTSUPP;
968 }
969 return error;
970}
971
972static moduledata_t linux_elf_mod = {
973 "linuxelf",
974 linux_elf_modevent,
975 0
976};
977
978DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);