linux_sysvec.c revision 140992
1/*- 2 * Copyright (c) 1994-1996 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: head/sys/i386/linux/linux_sysvec.c 140992 2005-01-29 23:12:00Z sobomax $"); 31 32/* XXX we use functions that might not exist. */ 33#include "opt_compat.h" 34 35#ifndef COMPAT_43 36#error "Unable to compile Linux-emulator due to missing COMPAT_43 option!" 37#endif 38 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/exec.h> 42#include <sys/imgact.h> 43#include <sys/imgact_aout.h> 44#include <sys/imgact_elf.h> 45#include <sys/kernel.h> 46#include <sys/lock.h> 47#include <sys/malloc.h> 48#include <sys/module.h> 49#include <sys/mutex.h> 50#include <sys/proc.h> 51#include <sys/signalvar.h> 52#include <sys/syscallsubr.h> 53#include <sys/sysent.h> 54#include <sys/sysproto.h> 55#include <sys/vnode.h> 56 57#include <vm/vm.h> 58#include <vm/pmap.h> 59#include <vm/vm_extern.h> 60#include <vm/vm_map.h> 61#include <vm/vm_object.h> 62#include <vm/vm_page.h> 63#include <vm/vm_param.h> 64 65#include <machine/cpu.h> 66#include <machine/md_var.h> 67#include <machine/pcb.h> 68 69#include <i386/linux/linux.h> 70#include <i386/linux/linux_proto.h> 71#include <compat/linux/linux_mib.h> 72#include <compat/linux/linux_signal.h> 73#include <compat/linux/linux_util.h> 74 75MODULE_VERSION(linux, 1); 76MODULE_DEPEND(linux, sysvmsg, 1, 1, 1); 77MODULE_DEPEND(linux, sysvsem, 1, 1, 1); 78MODULE_DEPEND(linux, sysvshm, 1, 1, 1); 79 80MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 81 82#if BYTE_ORDER == LITTLE_ENDIAN 83#define SHELLMAGIC 0x2123 /* #! */ 84#else 85#define SHELLMAGIC 0x2321 86#endif 87 88/* 89 * Allow the sendsig functions to use the ldebug() facility 90 * even though they are not syscalls themselves. Map them 91 * to syscall 0. This is slightly less bogus than using 92 * ldebug(sigreturn). 93 */ 94#define LINUX_SYS_linux_rt_sendsig 0 95#define LINUX_SYS_linux_sendsig 0 96 97extern char linux_sigcode[]; 98extern int linux_szsigcode; 99 100extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 101 102SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 103 104static int linux_fixup(register_t **stack_base, 105 struct image_params *iparams); 106static int elf_linux_fixup(register_t **stack_base, 107 struct image_params *iparams); 108static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 109 caddr_t *params); 110static void linux_sendsig(sig_t catcher, int sig, sigset_t *mask, 111 u_long code); 112static void exec_linux_setregs(struct thread *td, u_long entry, 113 u_long stack, u_long ps_strings); 114 115/* 116 * Linux syscalls return negative errno's, we do positive and map them 117 */ 118static int bsd_to_linux_errno[ELAST + 1] = { 119 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 120 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 121 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 122 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 123 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 124 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 125 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 126 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 127 -6, -6, -43, -42, -75, -6, -84 128}; 129 130int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 131 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 132 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 133 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 134 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 135 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 136 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 137 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 138 0, LINUX_SIGUSR1, LINUX_SIGUSR2 139}; 140 141int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 142 SIGHUP, SIGINT, SIGQUIT, SIGILL, 143 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 144 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 145 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 146 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 147 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 148 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 149 SIGIO, SIGURG, SIGSYS 150}; 151 152#define LINUX_T_UNKNOWN 255 153static int _bsd_to_linux_trapcode[] = { 154 LINUX_T_UNKNOWN, /* 0 */ 155 6, /* 1 T_PRIVINFLT */ 156 LINUX_T_UNKNOWN, /* 2 */ 157 3, /* 3 T_BPTFLT */ 158 LINUX_T_UNKNOWN, /* 4 */ 159 LINUX_T_UNKNOWN, /* 5 */ 160 16, /* 6 T_ARITHTRAP */ 161 254, /* 7 T_ASTFLT */ 162 LINUX_T_UNKNOWN, /* 8 */ 163 13, /* 9 T_PROTFLT */ 164 1, /* 10 T_TRCTRAP */ 165 LINUX_T_UNKNOWN, /* 11 */ 166 14, /* 12 T_PAGEFLT */ 167 LINUX_T_UNKNOWN, /* 13 */ 168 17, /* 14 T_ALIGNFLT */ 169 LINUX_T_UNKNOWN, /* 15 */ 170 LINUX_T_UNKNOWN, /* 16 */ 171 LINUX_T_UNKNOWN, /* 17 */ 172 0, /* 18 T_DIVIDE */ 173 2, /* 19 T_NMI */ 174 4, /* 20 T_OFLOW */ 175 5, /* 21 T_BOUND */ 176 7, /* 22 T_DNA */ 177 8, /* 23 T_DOUBLEFLT */ 178 9, /* 24 T_FPOPFLT */ 179 10, /* 25 T_TSSFLT */ 180 11, /* 26 T_SEGNPFLT */ 181 12, /* 27 T_STKFLT */ 182 18, /* 28 T_MCHK */ 183 19, /* 29 T_XMMFLT */ 184 15 /* 30 T_RESERVED */ 185}; 186#define bsd_to_linux_trapcode(code) \ 187 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 188 _bsd_to_linux_trapcode[(code)]: \ 189 LINUX_T_UNKNOWN) 190 191/* 192 * If FreeBSD & Linux have a difference of opinion about what a trap 193 * means, deal with it here. 194 * 195 * MPSAFE 196 */ 197static int 198translate_traps(int signal, int trap_code) 199{ 200 if (signal != SIGBUS) 201 return signal; 202 switch (trap_code) { 203 case T_PROTFLT: 204 case T_TSSFLT: 205 case T_DOUBLEFLT: 206 case T_PAGEFLT: 207 return SIGSEGV; 208 default: 209 return signal; 210 } 211} 212 213static int 214linux_fixup(register_t **stack_base, struct image_params *imgp) 215{ 216 register_t *argv, *envp; 217 218 argv = *stack_base; 219 envp = *stack_base + (imgp->args->argc + 1); 220 (*stack_base)--; 221 **stack_base = (intptr_t)(void *)envp; 222 (*stack_base)--; 223 **stack_base = (intptr_t)(void *)argv; 224 (*stack_base)--; 225 **stack_base = imgp->args->argc; 226 return 0; 227} 228 229static int 230elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 231{ 232 Elf32_Auxargs *args; 233 register_t *pos; 234 235 KASSERT(curthread->td_proc == imgp->proc && 236 (curthread->td_proc->p_flag & P_SA) == 0, 237 ("unsafe elf_linux_fixup(), should be curproc")); 238 args = (Elf32_Auxargs *)imgp->auxargs; 239 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2); 240 241 if (args->trace) 242 AUXARGS_ENTRY(pos, AT_DEBUG, 1); 243 if (args->execfd != -1) 244 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 245 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 246 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 247 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 248 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 249 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 250 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 251 AUXARGS_ENTRY(pos, AT_BASE, args->base); 252 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 253 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 254 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 255 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 256 AUXARGS_ENTRY(pos, AT_NULL, 0); 257 258 free(imgp->auxargs, M_TEMP); 259 imgp->auxargs = NULL; 260 261 (*stack_base)--; 262 **stack_base = (register_t)imgp->args->argc; 263 return 0; 264} 265 266extern int _ucodesel, _udatasel; 267extern unsigned long linux_sznonrtsigcode; 268 269static void 270linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 271{ 272 struct thread *td = curthread; 273 struct proc *p = td->td_proc; 274 struct sigacts *psp; 275 struct trapframe *regs; 276 struct l_rt_sigframe *fp, frame; 277 int oonstack; 278 279 PROC_LOCK_ASSERT(p, MA_OWNED); 280 psp = p->p_sigacts; 281 mtx_assert(&psp->ps_mtx, MA_OWNED); 282 regs = td->td_frame; 283 oonstack = sigonstack(regs->tf_esp); 284 285#ifdef DEBUG 286 if (ldebug(rt_sendsig)) 287 printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"), 288 catcher, sig, (void*)mask, code); 289#endif 290 /* 291 * Allocate space for the signal handler context. 292 */ 293 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 294 SIGISMEMBER(psp->ps_sigonstack, sig)) { 295 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 296 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 297 } else 298 fp = (struct l_rt_sigframe *)regs->tf_esp - 1; 299 mtx_unlock(&psp->ps_mtx); 300 301 /* 302 * Build the argument list for the signal handler. 303 */ 304 if (p->p_sysent->sv_sigtbl) 305 if (sig <= p->p_sysent->sv_sigsize) 306 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 307 308 bzero(&frame, sizeof(frame)); 309 310 frame.sf_handler = catcher; 311 frame.sf_sig = sig; 312 frame.sf_siginfo = &fp->sf_si; 313 frame.sf_ucontext = &fp->sf_sc; 314 315 /* Fill in POSIX parts */ 316 frame.sf_si.lsi_signo = sig; 317 frame.sf_si.lsi_code = code; 318 frame.sf_si.lsi_addr = (void *)regs->tf_err; 319 320 /* 321 * Build the signal context to be used by sigreturn. 322 */ 323 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 324 frame.sf_sc.uc_link = NULL; /* XXX ??? */ 325 326 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp; 327 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 328 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 329 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 330 PROC_UNLOCK(p); 331 332 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 333 334 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 335 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 336 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 337 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 338 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 339 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; 340 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; 341 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; 342 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; 343 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; 344 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; 345 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; 346 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; 347 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 348 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; 349 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; 350 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 351 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 352 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 353 354#ifdef DEBUG 355 if (ldebug(rt_sendsig)) 356 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 357 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 358 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 359#endif 360 361 if (copyout(&frame, fp, sizeof(frame)) != 0) { 362 /* 363 * Process has trashed its stack; give it an illegal 364 * instruction to halt it in its tracks. 365 */ 366#ifdef DEBUG 367 if (ldebug(rt_sendsig)) 368 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 369 fp, oonstack); 370#endif 371 PROC_LOCK(p); 372 sigexit(td, SIGILL); 373 } 374 375 /* 376 * Build context to run handler in. 377 */ 378 regs->tf_esp = (int)fp; 379 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 380 linux_sznonrtsigcode; 381 regs->tf_eflags &= ~(PSL_T | PSL_VM); 382 regs->tf_cs = _ucodesel; 383 regs->tf_ds = _udatasel; 384 regs->tf_es = _udatasel; 385 regs->tf_fs = _udatasel; 386 regs->tf_ss = _udatasel; 387 PROC_LOCK(p); 388 mtx_lock(&psp->ps_mtx); 389} 390 391 392/* 393 * Send an interrupt to process. 394 * 395 * Stack is set up to allow sigcode stored 396 * in u. to call routine, followed by kcall 397 * to sigreturn routine below. After sigreturn 398 * resets the signal mask, the stack, and the 399 * frame pointer, it returns to the user 400 * specified pc, psl. 401 */ 402static void 403linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 404{ 405 struct thread *td = curthread; 406 struct proc *p = td->td_proc; 407 struct sigacts *psp; 408 struct trapframe *regs; 409 struct l_sigframe *fp, frame; 410 l_sigset_t lmask; 411 int oonstack, i; 412 413 PROC_LOCK_ASSERT(p, MA_OWNED); 414 psp = p->p_sigacts; 415 mtx_assert(&psp->ps_mtx, MA_OWNED); 416 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 417 /* Signal handler installed with SA_SIGINFO. */ 418 linux_rt_sendsig(catcher, sig, mask, code); 419 return; 420 } 421 422 regs = td->td_frame; 423 oonstack = sigonstack(regs->tf_esp); 424 425#ifdef DEBUG 426 if (ldebug(sendsig)) 427 printf(ARGS(sendsig, "%p, %d, %p, %lu"), 428 catcher, sig, (void*)mask, code); 429#endif 430 431 /* 432 * Allocate space for the signal handler context. 433 */ 434 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 435 SIGISMEMBER(psp->ps_sigonstack, sig)) { 436 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 437 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 438 } else 439 fp = (struct l_sigframe *)regs->tf_esp - 1; 440 mtx_unlock(&psp->ps_mtx); 441 PROC_UNLOCK(p); 442 443 /* 444 * Build the argument list for the signal handler. 445 */ 446 if (p->p_sysent->sv_sigtbl) 447 if (sig <= p->p_sysent->sv_sigsize) 448 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 449 450 bzero(&frame, sizeof(frame)); 451 452 frame.sf_handler = catcher; 453 frame.sf_sig = sig; 454 455 bsd_to_linux_sigset(mask, &lmask); 456 457 /* 458 * Build the signal context to be used by sigreturn. 459 */ 460 frame.sf_sc.sc_mask = lmask.__bits[0]; 461 frame.sf_sc.sc_gs = rgs(); 462 frame.sf_sc.sc_fs = regs->tf_fs; 463 frame.sf_sc.sc_es = regs->tf_es; 464 frame.sf_sc.sc_ds = regs->tf_ds; 465 frame.sf_sc.sc_edi = regs->tf_edi; 466 frame.sf_sc.sc_esi = regs->tf_esi; 467 frame.sf_sc.sc_ebp = regs->tf_ebp; 468 frame.sf_sc.sc_ebx = regs->tf_ebx; 469 frame.sf_sc.sc_edx = regs->tf_edx; 470 frame.sf_sc.sc_ecx = regs->tf_ecx; 471 frame.sf_sc.sc_eax = regs->tf_eax; 472 frame.sf_sc.sc_eip = regs->tf_eip; 473 frame.sf_sc.sc_cs = regs->tf_cs; 474 frame.sf_sc.sc_eflags = regs->tf_eflags; 475 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 476 frame.sf_sc.sc_ss = regs->tf_ss; 477 frame.sf_sc.sc_err = regs->tf_err; 478 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 479 480 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 481 frame.sf_extramask[i] = lmask.__bits[i+1]; 482 483 if (copyout(&frame, fp, sizeof(frame)) != 0) { 484 /* 485 * Process has trashed its stack; give it an illegal 486 * instruction to halt it in its tracks. 487 */ 488 PROC_LOCK(p); 489 sigexit(td, SIGILL); 490 } 491 492 /* 493 * Build context to run handler in. 494 */ 495 regs->tf_esp = (int)fp; 496 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); 497 regs->tf_eflags &= ~(PSL_T | PSL_VM); 498 regs->tf_cs = _ucodesel; 499 regs->tf_ds = _udatasel; 500 regs->tf_es = _udatasel; 501 regs->tf_fs = _udatasel; 502 regs->tf_ss = _udatasel; 503 PROC_LOCK(p); 504 mtx_lock(&psp->ps_mtx); 505} 506 507/* 508 * System call to cleanup state after a signal 509 * has been taken. Reset signal mask and 510 * stack state from context left by sendsig (above). 511 * Return to previous pc and psl as specified by 512 * context left by sendsig. Check carefully to 513 * make sure that the user has not modified the 514 * psl to gain improper privileges or to cause 515 * a machine fault. 516 */ 517int 518linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 519{ 520 struct proc *p = td->td_proc; 521 struct l_sigframe frame; 522 struct trapframe *regs; 523 l_sigset_t lmask; 524 int eflags, i; 525 526 regs = td->td_frame; 527 528#ifdef DEBUG 529 if (ldebug(sigreturn)) 530 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 531#endif 532 /* 533 * The trampoline code hands us the sigframe. 534 * It is unsafe to keep track of it ourselves, in the event that a 535 * program jumps out of a signal handler. 536 */ 537 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 538 return (EFAULT); 539 540 /* 541 * Check for security violations. 542 */ 543#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 544 eflags = frame.sf_sc.sc_eflags; 545 /* 546 * XXX do allow users to change the privileged flag PSL_RF. The 547 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 548 * sometimes set it there too. tf_eflags is kept in the signal 549 * context during signal handling and there is no other place 550 * to remember it, so the PSL_RF bit may be corrupted by the 551 * signal handler without us knowing. Corruption of the PSL_RF 552 * bit at worst causes one more or one less debugger trap, so 553 * allowing it is fairly harmless. 554 */ 555 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 556 return(EINVAL); 557 558 /* 559 * Don't allow users to load a valid privileged %cs. Let the 560 * hardware check for invalid selectors, excess privilege in 561 * other selectors, invalid %eip's and invalid %esp's. 562 */ 563#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 564 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 565 trapsignal(td, SIGBUS, T_PROTFLT); 566 return(EINVAL); 567 } 568 569 lmask.__bits[0] = frame.sf_sc.sc_mask; 570 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 571 lmask.__bits[i+1] = frame.sf_extramask[i]; 572 PROC_LOCK(p); 573 linux_to_bsd_sigset(&lmask, &td->td_sigmask); 574 SIG_CANTMASK(td->td_sigmask); 575 signotify(td); 576 PROC_UNLOCK(p); 577 578 /* 579 * Restore signal context. 580 */ 581 /* %gs was restored by the trampoline. */ 582 regs->tf_fs = frame.sf_sc.sc_fs; 583 regs->tf_es = frame.sf_sc.sc_es; 584 regs->tf_ds = frame.sf_sc.sc_ds; 585 regs->tf_edi = frame.sf_sc.sc_edi; 586 regs->tf_esi = frame.sf_sc.sc_esi; 587 regs->tf_ebp = frame.sf_sc.sc_ebp; 588 regs->tf_ebx = frame.sf_sc.sc_ebx; 589 regs->tf_edx = frame.sf_sc.sc_edx; 590 regs->tf_ecx = frame.sf_sc.sc_ecx; 591 regs->tf_eax = frame.sf_sc.sc_eax; 592 regs->tf_eip = frame.sf_sc.sc_eip; 593 regs->tf_cs = frame.sf_sc.sc_cs; 594 regs->tf_eflags = eflags; 595 regs->tf_esp = frame.sf_sc.sc_esp_at_signal; 596 regs->tf_ss = frame.sf_sc.sc_ss; 597 598 return (EJUSTRETURN); 599} 600 601/* 602 * System call to cleanup state after a signal 603 * has been taken. Reset signal mask and 604 * stack state from context left by rt_sendsig (above). 605 * Return to previous pc and psl as specified by 606 * context left by sendsig. Check carefully to 607 * make sure that the user has not modified the 608 * psl to gain improper privileges or to cause 609 * a machine fault. 610 */ 611int 612linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 613{ 614 struct proc *p = td->td_proc; 615 struct l_ucontext uc; 616 struct l_sigcontext *context; 617 l_stack_t *lss; 618 stack_t ss; 619 struct trapframe *regs; 620 int eflags; 621 622 regs = td->td_frame; 623 624#ifdef DEBUG 625 if (ldebug(rt_sigreturn)) 626 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 627#endif 628 /* 629 * The trampoline code hands us the ucontext. 630 * It is unsafe to keep track of it ourselves, in the event that a 631 * program jumps out of a signal handler. 632 */ 633 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 634 return (EFAULT); 635 636 context = &uc.uc_mcontext; 637 638 /* 639 * Check for security violations. 640 */ 641#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 642 eflags = context->sc_eflags; 643 /* 644 * XXX do allow users to change the privileged flag PSL_RF. The 645 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 646 * sometimes set it there too. tf_eflags is kept in the signal 647 * context during signal handling and there is no other place 648 * to remember it, so the PSL_RF bit may be corrupted by the 649 * signal handler without us knowing. Corruption of the PSL_RF 650 * bit at worst causes one more or one less debugger trap, so 651 * allowing it is fairly harmless. 652 */ 653 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 654 return(EINVAL); 655 656 /* 657 * Don't allow users to load a valid privileged %cs. Let the 658 * hardware check for invalid selectors, excess privilege in 659 * other selectors, invalid %eip's and invalid %esp's. 660 */ 661#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 662 if (!CS_SECURE(context->sc_cs)) { 663 trapsignal(td, SIGBUS, T_PROTFLT); 664 return(EINVAL); 665 } 666 667 PROC_LOCK(p); 668 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); 669 SIG_CANTMASK(td->td_sigmask); 670 signotify(td); 671 PROC_UNLOCK(p); 672 673 /* 674 * Restore signal context 675 */ 676 /* %gs was restored by the trampoline. */ 677 regs->tf_fs = context->sc_fs; 678 regs->tf_es = context->sc_es; 679 regs->tf_ds = context->sc_ds; 680 regs->tf_edi = context->sc_edi; 681 regs->tf_esi = context->sc_esi; 682 regs->tf_ebp = context->sc_ebp; 683 regs->tf_ebx = context->sc_ebx; 684 regs->tf_edx = context->sc_edx; 685 regs->tf_ecx = context->sc_ecx; 686 regs->tf_eax = context->sc_eax; 687 regs->tf_eip = context->sc_eip; 688 regs->tf_cs = context->sc_cs; 689 regs->tf_eflags = eflags; 690 regs->tf_esp = context->sc_esp_at_signal; 691 regs->tf_ss = context->sc_ss; 692 693 /* 694 * call sigaltstack & ignore results.. 695 */ 696 lss = &uc.uc_stack; 697 ss.ss_sp = lss->ss_sp; 698 ss.ss_size = lss->ss_size; 699 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 700 701#ifdef DEBUG 702 if (ldebug(rt_sigreturn)) 703 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 704 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 705#endif 706 (void)kern_sigaltstack(td, &ss, NULL); 707 708 return (EJUSTRETURN); 709} 710 711/* 712 * MPSAFE 713 */ 714static void 715linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 716{ 717 args[0] = tf->tf_ebx; 718 args[1] = tf->tf_ecx; 719 args[2] = tf->tf_edx; 720 args[3] = tf->tf_esi; 721 args[4] = tf->tf_edi; 722 args[5] = tf->tf_ebp; /* Unconfirmed */ 723 *params = NULL; /* no copyin */ 724} 725 726/* 727 * If a linux binary is exec'ing something, try this image activator 728 * first. We override standard shell script execution in order to 729 * be able to modify the interpreter path. We only do this if a linux 730 * binary is doing the exec, so we do not create an EXEC module for it. 731 */ 732static int exec_linux_imgact_try(struct image_params *iparams); 733 734static int 735exec_linux_imgact_try(struct image_params *imgp) 736{ 737 const char *head = (const char *)imgp->image_header; 738 int error = -1; 739 740 /* 741 * The interpreter for shell scripts run from a linux binary needs 742 * to be located in /compat/linux if possible in order to recursively 743 * maintain linux path emulation. 744 */ 745 if (((const short *)head)[0] == SHELLMAGIC) { 746 /* 747 * Run our normal shell image activator. If it succeeds attempt 748 * to use the alternate path for the interpreter. If an alternate 749 * path is found, use our stringspace to store it. 750 */ 751 if ((error = exec_shell_imgact(imgp)) == 0) { 752 char *rpath = NULL; 753 754 linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL, 755 imgp->interpreter_name, &rpath, 0); 756 if (rpath != imgp->interpreter_name) { 757 int len = strlen(rpath) + 1; 758 759 if (len <= MAXSHELLCMDLEN) { 760 memcpy(imgp->interpreter_name, rpath, len); 761 } 762 free(rpath, M_TEMP); 763 } 764 } 765 } 766 return(error); 767} 768 769/* 770 * exec_setregs may initialize some registers differently than Linux 771 * does, thus potentially confusing Linux binaries. If necessary, we 772 * override the exec_setregs default(s) here. 773 */ 774static void 775exec_linux_setregs(struct thread *td, u_long entry, 776 u_long stack, u_long ps_strings) 777{ 778 struct pcb *pcb = td->td_pcb; 779 780 exec_setregs(td, entry, stack, ps_strings); 781 782 /* Linux sets %gs to 0, we default to _udatasel */ 783 pcb->pcb_gs = 0; load_gs(0); 784} 785 786struct sysentvec linux_sysvec = { 787 LINUX_SYS_MAXSYSCALL, 788 linux_sysent, 789 0xff, 790 LINUX_SIGTBLSZ, 791 bsd_to_linux_signal, 792 ELAST + 1, 793 bsd_to_linux_errno, 794 translate_traps, 795 linux_fixup, 796 linux_sendsig, 797 linux_sigcode, 798 &linux_szsigcode, 799 linux_prepsyscall, 800 "Linux a.out", 801 NULL, 802 exec_linux_imgact_try, 803 LINUX_MINSIGSTKSZ, 804 PAGE_SIZE, 805 VM_MIN_ADDRESS, 806 VM_MAXUSER_ADDRESS, 807 USRSTACK, 808 PS_STRINGS, 809 VM_PROT_ALL, 810 exec_copyout_strings, 811 exec_linux_setregs, 812 NULL 813}; 814 815struct sysentvec elf_linux_sysvec = { 816 LINUX_SYS_MAXSYSCALL, 817 linux_sysent, 818 0xff, 819 LINUX_SIGTBLSZ, 820 bsd_to_linux_signal, 821 ELAST + 1, 822 bsd_to_linux_errno, 823 translate_traps, 824 elf_linux_fixup, 825 linux_sendsig, 826 linux_sigcode, 827 &linux_szsigcode, 828 linux_prepsyscall, 829 "Linux ELF", 830 elf32_coredump, 831 exec_linux_imgact_try, 832 LINUX_MINSIGSTKSZ, 833 PAGE_SIZE, 834 VM_MIN_ADDRESS, 835 VM_MAXUSER_ADDRESS, 836 USRSTACK, 837 PS_STRINGS, 838 VM_PROT_ALL, 839 exec_copyout_strings, 840 exec_linux_setregs, 841 NULL 842}; 843 844static Elf32_Brandinfo linux_brand = { 845 ELFOSABI_LINUX, 846 EM_386, 847 "Linux", 848 "/compat/linux", 849 "/lib/ld-linux.so.1", 850 &elf_linux_sysvec, 851 NULL, 852 }; 853 854static Elf32_Brandinfo linux_glibc2brand = { 855 ELFOSABI_LINUX, 856 EM_386, 857 "Linux", 858 "/compat/linux", 859 "/lib/ld-linux.so.2", 860 &elf_linux_sysvec, 861 NULL, 862 }; 863 864Elf32_Brandinfo *linux_brandlist[] = { 865 &linux_brand, 866 &linux_glibc2brand, 867 NULL 868 }; 869 870static int 871linux_elf_modevent(module_t mod, int type, void *data) 872{ 873 Elf32_Brandinfo **brandinfo; 874 int error; 875 struct linux_ioctl_handler **lihp; 876 877 error = 0; 878 879 switch(type) { 880 case MOD_LOAD: 881 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 882 ++brandinfo) 883 if (elf32_insert_brand_entry(*brandinfo) < 0) 884 error = EINVAL; 885 if (error == 0) { 886 SET_FOREACH(lihp, linux_ioctl_handler_set) 887 linux_ioctl_register_handler(*lihp); 888 if (bootverbose) 889 printf("Linux ELF exec handler installed\n"); 890 } else 891 printf("cannot insert Linux ELF brand handler\n"); 892 break; 893 case MOD_UNLOAD: 894 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 895 ++brandinfo) 896 if (elf32_brand_inuse(*brandinfo)) 897 error = EBUSY; 898 if (error == 0) { 899 for (brandinfo = &linux_brandlist[0]; 900 *brandinfo != NULL; ++brandinfo) 901 if (elf32_remove_brand_entry(*brandinfo) < 0) 902 error = EINVAL; 903 } 904 if (error == 0) { 905 SET_FOREACH(lihp, linux_ioctl_handler_set) 906 linux_ioctl_unregister_handler(*lihp); 907 if (bootverbose) 908 printf("Linux ELF exec handler removed\n"); 909 linux_mib_destroy(); 910 } else 911 printf("Could not deinstall ELF interpreter entry\n"); 912 break; 913 default: 914 return EOPNOTSUPP; 915 } 916 return error; 917} 918 919static moduledata_t linux_elf_mod = { 920 "linuxelf", 921 linux_elf_modevent, 922 0 923}; 924 925DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 926