124 125static int elf_linux_fixup(register_t **stack_base, 126 struct image_params *iparams); 127static register_t *linux_copyout_strings(struct image_params *imgp); 128static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 129static void exec_linux_setregs(struct thread *td, 130 struct image_params *imgp, u_long stack); 131static void linux32_fixlimit(struct rlimit *rl, int which); 132static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel); 133static void linux_vdso_install(void *param); 134static void linux_vdso_deinstall(void *param); 135 136static eventhandler_tag linux_exit_tag; 137static eventhandler_tag linux_exec_tag; 138static eventhandler_tag linux_thread_dtor_tag; 139 140/* 141 * Linux syscalls return negative errno's, we do positive and map them 142 * Reference: 143 * FreeBSD: src/sys/sys/errno.h 144 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h 145 * linux-2.6.17.8/include/asm-generic/errno.h 146 */ 147static int bsd_to_linux_errno[ELAST + 1] = { 148 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 149 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 150 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 151 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 152 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 153 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 154 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 155 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 156 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, 157 -72, -67, -71 158}; 159 160int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 161 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 162 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 163 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 164 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 165 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 166 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 167 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 168 0, LINUX_SIGUSR1, LINUX_SIGUSR2 169}; 170 171int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 172 SIGHUP, SIGINT, SIGQUIT, SIGILL, 173 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 174 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 175 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 176 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 177 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 178 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 179 SIGIO, SIGURG, SIGSYS 180}; 181 182#define LINUX_T_UNKNOWN 255 183static int _bsd_to_linux_trapcode[] = { 184 LINUX_T_UNKNOWN, /* 0 */ 185 6, /* 1 T_PRIVINFLT */ 186 LINUX_T_UNKNOWN, /* 2 */ 187 3, /* 3 T_BPTFLT */ 188 LINUX_T_UNKNOWN, /* 4 */ 189 LINUX_T_UNKNOWN, /* 5 */ 190 16, /* 6 T_ARITHTRAP */ 191 254, /* 7 T_ASTFLT */ 192 LINUX_T_UNKNOWN, /* 8 */ 193 13, /* 9 T_PROTFLT */ 194 1, /* 10 T_TRCTRAP */ 195 LINUX_T_UNKNOWN, /* 11 */ 196 14, /* 12 T_PAGEFLT */ 197 LINUX_T_UNKNOWN, /* 13 */ 198 17, /* 14 T_ALIGNFLT */ 199 LINUX_T_UNKNOWN, /* 15 */ 200 LINUX_T_UNKNOWN, /* 16 */ 201 LINUX_T_UNKNOWN, /* 17 */ 202 0, /* 18 T_DIVIDE */ 203 2, /* 19 T_NMI */ 204 4, /* 20 T_OFLOW */ 205 5, /* 21 T_BOUND */ 206 7, /* 22 T_DNA */ 207 8, /* 23 T_DOUBLEFLT */ 208 9, /* 24 T_FPOPFLT */ 209 10, /* 25 T_TSSFLT */ 210 11, /* 26 T_SEGNPFLT */ 211 12, /* 27 T_STKFLT */ 212 18, /* 28 T_MCHK */ 213 19, /* 29 T_XMMFLT */ 214 15 /* 30 T_RESERVED */ 215}; 216#define bsd_to_linux_trapcode(code) \ 217 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 218 _bsd_to_linux_trapcode[(code)]: \ 219 LINUX_T_UNKNOWN) 220 221struct linux32_ps_strings { 222 u_int32_t ps_argvstr; /* first of 0 or more argument strings */ 223 u_int ps_nargvstr; /* the number of argument strings */ 224 u_int32_t ps_envstr; /* first of 0 or more environment strings */ 225 u_int ps_nenvstr; /* the number of environment strings */ 226}; 227 228LINUX_VDSO_SYM_INTPTR(linux32_sigcode); 229LINUX_VDSO_SYM_INTPTR(linux32_rt_sigcode); 230LINUX_VDSO_SYM_INTPTR(linux32_vsyscall); 231LINUX_VDSO_SYM_CHAR(linux_platform); 232 233/* 234 * If FreeBSD & Linux have a difference of opinion about what a trap 235 * means, deal with it here. 236 * 237 * MPSAFE 238 */ 239static int 240translate_traps(int signal, int trap_code) 241{ 242 if (signal != SIGBUS) 243 return signal; 244 switch (trap_code) { 245 case T_PROTFLT: 246 case T_TSSFLT: 247 case T_DOUBLEFLT: 248 case T_PAGEFLT: 249 return SIGSEGV; 250 default: 251 return signal; 252 } 253} 254 255static int 256elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 257{ 258 Elf32_Auxargs *args; 259 Elf32_Addr *base; 260 Elf32_Addr *pos; 261 struct linux32_ps_strings *arginfo; 262 263 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 264 265 KASSERT(curthread->td_proc == imgp->proc, 266 ("unsafe elf_linux_fixup(), should be curproc")); 267 base = (Elf32_Addr *)*stack_base; 268 args = (Elf32_Auxargs *)imgp->auxargs; 269 pos = base + (imgp->args->argc + imgp->args->envc + 2); 270 271 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO_EHDR, 272 imgp->proc->p_sysent->sv_shared_page_base); 273 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO, linux32_vsyscall); 274 AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature); 275 276 /* 277 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, 278 * as it has appeared in the 2.4.0-rc7 first time. 279 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), 280 * glibc falls back to the hard-coded CLK_TCK value when aux entry 281 * is not present. 282 * Also see linux_times() implementation. 283 */ 284 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) 285 AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz); 286 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr); 287 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent); 288 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum); 289 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz); 290 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags); 291 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry); 292 AUXARGS_ENTRY_32(pos, AT_BASE, args->base); 293 AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0); 294 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 295 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 296 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 297 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 298 AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform)); 299 if (args->execfd != -1) 300 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd); 301 AUXARGS_ENTRY_32(pos, AT_NULL, 0); 302 303 free(imgp->auxargs, M_TEMP); 304 imgp->auxargs = NULL; 305 306 base--; 307 suword32(base, (uint32_t)imgp->args->argc); 308 *stack_base = (register_t *)base; 309 return (0); 310} 311 312static void 313linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 314{ 315 struct thread *td = curthread; 316 struct proc *p = td->td_proc; 317 struct sigacts *psp; 318 struct trapframe *regs; 319 struct l_rt_sigframe *fp, frame; 320 int oonstack; 321 int sig; 322 int code; 323 324 sig = ksi->ksi_signo; 325 code = ksi->ksi_code; 326 PROC_LOCK_ASSERT(p, MA_OWNED); 327 psp = p->p_sigacts; 328 mtx_assert(&psp->ps_mtx, MA_OWNED); 329 regs = td->td_frame; 330 oonstack = sigonstack(regs->tf_rsp); 331 332#ifdef DEBUG 333 if (ldebug(rt_sendsig)) 334 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 335 catcher, sig, (void*)mask, code); 336#endif 337 /* 338 * Allocate space for the signal handler context. 339 */ 340 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 341 SIGISMEMBER(psp->ps_sigonstack, sig)) { 342 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 343 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 344 } else 345 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; 346 mtx_unlock(&psp->ps_mtx); 347 348 /* 349 * Build the argument list for the signal handler. 350 */ 351 if (p->p_sysent->sv_sigtbl) 352 if (sig <= p->p_sysent->sv_sigsize) 353 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 354 355 bzero(&frame, sizeof(frame)); 356 357 frame.sf_handler = PTROUT(catcher); 358 frame.sf_sig = sig; 359 frame.sf_siginfo = PTROUT(&fp->sf_si); 360 frame.sf_ucontext = PTROUT(&fp->sf_sc); 361 362 /* Fill in POSIX parts */ 363 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); 364 365 /* 366 * Build the signal context to be used by sigreturn 367 * and libgcc unwind. 368 */ 369 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 370 frame.sf_sc.uc_link = 0; /* XXX ??? */ 371 372 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); 373 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 374 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 375 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 376 PROC_UNLOCK(p); 377 378 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 379 380 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 381 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; 382 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; 383 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; 384 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; 385 frame.sf_sc.uc_mcontext.sc_esp = regs->tf_rsp; 386 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; 387 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; 388 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; 389 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; 390 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 391 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs; 392 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 393 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 394 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 395 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; 396 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; 397 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 398 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 399 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 400 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 401 402#ifdef DEBUG 403 if (ldebug(rt_sendsig)) 404 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 405 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 406 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 407#endif 408 409 if (copyout(&frame, fp, sizeof(frame)) != 0) { 410 /* 411 * Process has trashed its stack; give it an illegal 412 * instruction to halt it in its tracks. 413 */ 414#ifdef DEBUG 415 if (ldebug(rt_sendsig)) 416 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 417 fp, oonstack); 418#endif 419 PROC_LOCK(p); 420 sigexit(td, SIGILL); 421 } 422 423 /* 424 * Build context to run handler in. 425 */ 426 regs->tf_rsp = PTROUT(fp); 427 regs->tf_rip = linux32_rt_sigcode; 428 regs->tf_rflags &= ~(PSL_T | PSL_D); 429 regs->tf_cs = _ucode32sel; 430 regs->tf_ss = _udatasel; 431 regs->tf_ds = _udatasel; 432 regs->tf_es = _udatasel; 433 regs->tf_fs = _ufssel; 434 regs->tf_gs = _ugssel; 435 regs->tf_flags = TF_HASSEGS; 436 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 437 PROC_LOCK(p); 438 mtx_lock(&psp->ps_mtx); 439} 440 441 442/* 443 * Send an interrupt to process. 444 * 445 * Stack is set up to allow sigcode stored 446 * in u. to call routine, followed by kcall 447 * to sigreturn routine below. After sigreturn 448 * resets the signal mask, the stack, and the 449 * frame pointer, it returns to the user 450 * specified pc, psl. 451 */ 452static void 453linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 454{ 455 struct thread *td = curthread; 456 struct proc *p = td->td_proc; 457 struct sigacts *psp; 458 struct trapframe *regs; 459 struct l_sigframe *fp, frame; 460 l_sigset_t lmask; 461 int oonstack, i; 462 int sig, code; 463 464 sig = ksi->ksi_signo; 465 code = ksi->ksi_code; 466 PROC_LOCK_ASSERT(p, MA_OWNED); 467 psp = p->p_sigacts; 468 mtx_assert(&psp->ps_mtx, MA_OWNED); 469 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 470 /* Signal handler installed with SA_SIGINFO. */ 471 linux_rt_sendsig(catcher, ksi, mask); 472 return; 473 } 474 475 regs = td->td_frame; 476 oonstack = sigonstack(regs->tf_rsp); 477 478#ifdef DEBUG 479 if (ldebug(sendsig)) 480 printf(ARGS(sendsig, "%p, %d, %p, %u"), 481 catcher, sig, (void*)mask, code); 482#endif 483 484 /* 485 * Allocate space for the signal handler context. 486 */ 487 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 488 SIGISMEMBER(psp->ps_sigonstack, sig)) { 489 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 490 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 491 } else 492 fp = (struct l_sigframe *)regs->tf_rsp - 1; 493 mtx_unlock(&psp->ps_mtx); 494 PROC_UNLOCK(p); 495 496 /* 497 * Build the argument list for the signal handler. 498 */ 499 if (p->p_sysent->sv_sigtbl) 500 if (sig <= p->p_sysent->sv_sigsize) 501 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 502 503 bzero(&frame, sizeof(frame)); 504 505 frame.sf_handler = PTROUT(catcher); 506 frame.sf_sig = sig; 507 508 bsd_to_linux_sigset(mask, &lmask); 509 510 /* 511 * Build the signal context to be used by sigreturn. 512 */ 513 frame.sf_sc.sc_mask = lmask.__bits[0]; 514 frame.sf_sc.sc_gs = regs->tf_gs; 515 frame.sf_sc.sc_fs = regs->tf_fs; 516 frame.sf_sc.sc_es = regs->tf_es; 517 frame.sf_sc.sc_ds = regs->tf_ds; 518 frame.sf_sc.sc_edi = regs->tf_rdi; 519 frame.sf_sc.sc_esi = regs->tf_rsi; 520 frame.sf_sc.sc_ebp = regs->tf_rbp; 521 frame.sf_sc.sc_ebx = regs->tf_rbx; 522 frame.sf_sc.sc_esp = regs->tf_rsp; 523 frame.sf_sc.sc_edx = regs->tf_rdx; 524 frame.sf_sc.sc_ecx = regs->tf_rcx; 525 frame.sf_sc.sc_eax = regs->tf_rax; 526 frame.sf_sc.sc_eip = regs->tf_rip; 527 frame.sf_sc.sc_cs = regs->tf_cs; 528 frame.sf_sc.sc_eflags = regs->tf_rflags; 529 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; 530 frame.sf_sc.sc_ss = regs->tf_ss; 531 frame.sf_sc.sc_err = regs->tf_err; 532 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 533 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 534 535 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 536 frame.sf_extramask[i] = lmask.__bits[i+1]; 537 538 if (copyout(&frame, fp, sizeof(frame)) != 0) { 539 /* 540 * Process has trashed its stack; give it an illegal 541 * instruction to halt it in its tracks. 542 */ 543 PROC_LOCK(p); 544 sigexit(td, SIGILL); 545 } 546 547 /* 548 * Build context to run handler in. 549 */ 550 regs->tf_rsp = PTROUT(fp); 551 regs->tf_rip = linux32_sigcode; 552 regs->tf_rflags &= ~(PSL_T | PSL_D); 553 regs->tf_cs = _ucode32sel; 554 regs->tf_ss = _udatasel; 555 regs->tf_ds = _udatasel; 556 regs->tf_es = _udatasel; 557 regs->tf_fs = _ufssel; 558 regs->tf_gs = _ugssel; 559 regs->tf_flags = TF_HASSEGS; 560 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 561 PROC_LOCK(p); 562 mtx_lock(&psp->ps_mtx); 563} 564 565/* 566 * System call to cleanup state after a signal 567 * has been taken. Reset signal mask and 568 * stack state from context left by sendsig (above). 569 * Return to previous pc and psl as specified by 570 * context left by sendsig. Check carefully to 571 * make sure that the user has not modified the 572 * psl to gain improper privileges or to cause 573 * a machine fault. 574 */ 575int 576linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 577{ 578 struct l_sigframe frame; 579 struct trapframe *regs; 580 sigset_t bmask; 581 l_sigset_t lmask; 582 int eflags, i; 583 ksiginfo_t ksi; 584 585 regs = td->td_frame; 586 587#ifdef DEBUG 588 if (ldebug(sigreturn)) 589 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 590#endif 591 /* 592 * The trampoline code hands us the sigframe. 593 * It is unsafe to keep track of it ourselves, in the event that a 594 * program jumps out of a signal handler. 595 */ 596 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 597 return (EFAULT); 598 599 /* 600 * Check for security violations. 601 */ 602#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 603 eflags = frame.sf_sc.sc_eflags; 604 if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) 605 return(EINVAL); 606 607 /* 608 * Don't allow users to load a valid privileged %cs. Let the 609 * hardware check for invalid selectors, excess privilege in 610 * other selectors, invalid %eip's and invalid %esp's. 611 */ 612#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 613 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 614 ksiginfo_init_trap(&ksi); 615 ksi.ksi_signo = SIGBUS; 616 ksi.ksi_code = BUS_OBJERR; 617 ksi.ksi_trapno = T_PROTFLT; 618 ksi.ksi_addr = (void *)regs->tf_rip; 619 trapsignal(td, &ksi); 620 return(EINVAL); 621 } 622 623 lmask.__bits[0] = frame.sf_sc.sc_mask; 624 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 625 lmask.__bits[i+1] = frame.sf_extramask[i]; 626 linux_to_bsd_sigset(&lmask, &bmask); 627 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 628 629 /* 630 * Restore signal context. 631 */ 632 regs->tf_rdi = frame.sf_sc.sc_edi; 633 regs->tf_rsi = frame.sf_sc.sc_esi; 634 regs->tf_rbp = frame.sf_sc.sc_ebp; 635 regs->tf_rbx = frame.sf_sc.sc_ebx; 636 regs->tf_rdx = frame.sf_sc.sc_edx; 637 regs->tf_rcx = frame.sf_sc.sc_ecx; 638 regs->tf_rax = frame.sf_sc.sc_eax; 639 regs->tf_rip = frame.sf_sc.sc_eip; 640 regs->tf_cs = frame.sf_sc.sc_cs; 641 regs->tf_ds = frame.sf_sc.sc_ds; 642 regs->tf_es = frame.sf_sc.sc_es; 643 regs->tf_fs = frame.sf_sc.sc_fs; 644 regs->tf_gs = frame.sf_sc.sc_gs; 645 regs->tf_rflags = eflags; 646 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal; 647 regs->tf_ss = frame.sf_sc.sc_ss; 648 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 649 650 return (EJUSTRETURN); 651} 652 653/* 654 * System call to cleanup state after a signal 655 * has been taken. Reset signal mask and 656 * stack state from context left by rt_sendsig (above). 657 * Return to previous pc and psl as specified by 658 * context left by sendsig. Check carefully to 659 * make sure that the user has not modified the 660 * psl to gain improper privileges or to cause 661 * a machine fault. 662 */ 663int 664linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 665{ 666 struct l_ucontext uc; 667 struct l_sigcontext *context; 668 sigset_t bmask; 669 l_stack_t *lss; 670 stack_t ss; 671 struct trapframe *regs; 672 int eflags; 673 ksiginfo_t ksi; 674 675 regs = td->td_frame; 676 677#ifdef DEBUG 678 if (ldebug(rt_sigreturn)) 679 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 680#endif 681 /* 682 * The trampoline code hands us the ucontext. 683 * It is unsafe to keep track of it ourselves, in the event that a 684 * program jumps out of a signal handler. 685 */ 686 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 687 return (EFAULT); 688 689 context = &uc.uc_mcontext; 690 691 /* 692 * Check for security violations. 693 */ 694#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 695 eflags = context->sc_eflags; 696 if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) 697 return(EINVAL); 698 699 /* 700 * Don't allow users to load a valid privileged %cs. Let the 701 * hardware check for invalid selectors, excess privilege in 702 * other selectors, invalid %eip's and invalid %esp's. 703 */ 704#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 705 if (!CS_SECURE(context->sc_cs)) { 706 ksiginfo_init_trap(&ksi); 707 ksi.ksi_signo = SIGBUS; 708 ksi.ksi_code = BUS_OBJERR; 709 ksi.ksi_trapno = T_PROTFLT; 710 ksi.ksi_addr = (void *)regs->tf_rip; 711 trapsignal(td, &ksi); 712 return(EINVAL); 713 } 714 715 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); 716 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 717 718 /* 719 * Restore signal context 720 */ 721 regs->tf_gs = context->sc_gs; 722 regs->tf_fs = context->sc_fs; 723 regs->tf_es = context->sc_es; 724 regs->tf_ds = context->sc_ds; 725 regs->tf_rdi = context->sc_edi; 726 regs->tf_rsi = context->sc_esi; 727 regs->tf_rbp = context->sc_ebp; 728 regs->tf_rbx = context->sc_ebx; 729 regs->tf_rdx = context->sc_edx; 730 regs->tf_rcx = context->sc_ecx; 731 regs->tf_rax = context->sc_eax; 732 regs->tf_rip = context->sc_eip; 733 regs->tf_cs = context->sc_cs; 734 regs->tf_rflags = eflags; 735 regs->tf_rsp = context->sc_esp_at_signal; 736 regs->tf_ss = context->sc_ss; 737 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 738 739 /* 740 * call sigaltstack & ignore results.. 741 */ 742 lss = &uc.uc_stack; 743 ss.ss_sp = PTRIN(lss->ss_sp); 744 ss.ss_size = lss->ss_size; 745 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 746 747#ifdef DEBUG 748 if (ldebug(rt_sigreturn)) 749 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 750 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 751#endif 752 (void)kern_sigaltstack(td, &ss, NULL); 753 754 return (EJUSTRETURN); 755} 756 757static int 758linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa) 759{ 760 struct proc *p; 761 struct trapframe *frame; 762 763 p = td->td_proc; 764 frame = td->td_frame; 765 766 sa->args[0] = frame->tf_rbx; 767 sa->args[1] = frame->tf_rcx; 768 sa->args[2] = frame->tf_rdx; 769 sa->args[3] = frame->tf_rsi; 770 sa->args[4] = frame->tf_rdi; 771 sa->args[5] = frame->tf_rbp; /* Unconfirmed */ 772 sa->code = frame->tf_rax; 773 774 if (sa->code >= p->p_sysent->sv_size) 775 sa->callp = &p->p_sysent->sv_table[0]; 776 else 777 sa->callp = &p->p_sysent->sv_table[sa->code]; 778 sa->narg = sa->callp->sy_narg; 779 780 td->td_retval[0] = 0; 781 td->td_retval[1] = frame->tf_rdx; 782 783 return (0); 784} 785 786/* 787 * If a linux binary is exec'ing something, try this image activator 788 * first. We override standard shell script execution in order to 789 * be able to modify the interpreter path. We only do this if a linux 790 * binary is doing the exec, so we do not create an EXEC module for it. 791 */ 792static int exec_linux_imgact_try(struct image_params *iparams); 793 794static int 795exec_linux_imgact_try(struct image_params *imgp) 796{ 797 const char *head = (const char *)imgp->image_header; 798 char *rpath; 799 int error = -1; 800 801 /* 802 * The interpreter for shell scripts run from a linux binary needs 803 * to be located in /compat/linux if possible in order to recursively 804 * maintain linux path emulation. 805 */ 806 if (((const short *)head)[0] == SHELLMAGIC) { 807 /* 808 * Run our normal shell image activator. If it succeeds attempt 809 * to use the alternate path for the interpreter. If an 810 * alternate * path is found, use our stringspace to store it. 811 */ 812 if ((error = exec_shell_imgact(imgp)) == 0) { 813 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 814 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, 815 AT_FDCWD); 816 if (rpath != NULL) 817 imgp->args->fname_buf = 818 imgp->interpreter_name = rpath; 819 } 820 } 821 return (error); 822} 823 824/* 825 * Clear registers on exec 826 * XXX copied from ia32_signal.c. 827 */ 828static void 829exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack) 830{ 831 struct trapframe *regs = td->td_frame; 832 struct pcb *pcb = td->td_pcb; 833 834 mtx_lock(&dt_lock); 835 if (td->td_proc->p_md.md_ldt != NULL) 836 user_ldt_free(td); 837 else 838 mtx_unlock(&dt_lock); 839 840 critical_enter(); 841 wrmsr(MSR_FSBASE, 0); 842 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ 843 pcb->pcb_fsbase = 0; 844 pcb->pcb_gsbase = 0; 845 critical_exit(); 846 pcb->pcb_initial_fpucw = __LINUX_NPXCW__; 847 848 bzero((char *)regs, sizeof(struct trapframe)); 849 regs->tf_rip = imgp->entry_addr; 850 regs->tf_rsp = stack; 851 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); 852 regs->tf_gs = _ugssel; 853 regs->tf_fs = _ufssel; 854 regs->tf_es = _udatasel; 855 regs->tf_ds = _udatasel; 856 regs->tf_ss = _udatasel; 857 regs->tf_flags = TF_HASSEGS; 858 regs->tf_cs = _ucode32sel; 859 regs->tf_rbx = imgp->ps_strings; 860 861 fpstate_drop(td); 862 863 /* Do full restore on return so that we can change to a different %cs */ 864 set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET); 865 td->td_retval[1] = 0; 866} 867 868/* 869 * XXX copied from ia32_sysvec.c. 870 */ 871static register_t * 872linux_copyout_strings(struct image_params *imgp) 873{ 874 int argc, envc; 875 u_int32_t *vectp; 876 char *stringp, *destp; 877 u_int32_t *stack_base; 878 struct linux32_ps_strings *arginfo; 879 880 /* 881 * Calculate string base and vector table pointers. 882 */ 883 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 884 destp = (caddr_t)arginfo - SPARE_USRSPACE - 885 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); 886 887 /* 888 * If we have a valid auxargs ptr, prepare some room 889 * on the stack. 890 */ 891 if (imgp->auxargs) { 892 /* 893 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 894 * lower compatibility. 895 */ 896 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : 897 (LINUX_AT_COUNT * 2); 898 /* 899 * The '+ 2' is for the null pointers at the end of each of 900 * the arg and env vector sets,and imgp->auxarg_size is room 901 * for argument of Runtime loader. 902 */ 903 vectp = (u_int32_t *) (destp - (imgp->args->argc + 904 imgp->args->envc + 2 + imgp->auxarg_size) * 905 sizeof(u_int32_t)); 906 907 } else 908 /* 909 * The '+ 2' is for the null pointers at the end of each of 910 * the arg and env vector sets 911 */ 912 vectp = (u_int32_t *)(destp - (imgp->args->argc + 913 imgp->args->envc + 2) * sizeof(u_int32_t)); 914 915 /* 916 * vectp also becomes our initial stack base 917 */ 918 stack_base = vectp; 919 920 stringp = imgp->args->begin_argv; 921 argc = imgp->args->argc; 922 envc = imgp->args->envc; 923 /* 924 * Copy out strings - arguments and environment. 925 */ 926 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 927 928 /* 929 * Fill in "ps_strings" struct for ps, w, etc. 930 */ 931 suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp); 932 suword32(&arginfo->ps_nargvstr, argc); 933 934 /* 935 * Fill in argument portion of vector table. 936 */ 937 for (; argc > 0; --argc) { 938 suword32(vectp++, (uint32_t)(intptr_t)destp); 939 while (*stringp++ != 0) 940 destp++; 941 destp++; 942 } 943 944 /* a null vector table pointer separates the argp's from the envp's */ 945 suword32(vectp++, 0); 946 947 suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp); 948 suword32(&arginfo->ps_nenvstr, envc); 949 950 /* 951 * Fill in environment portion of vector table. 952 */ 953 for (; envc > 0; --envc) { 954 suword32(vectp++, (uint32_t)(intptr_t)destp); 955 while (*stringp++ != 0) 956 destp++; 957 destp++; 958 } 959 960 /* end of vector table is a null pointer */ 961 suword32(vectp, 0); 962 963 return ((register_t *)stack_base); 964} 965 966static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0, 967 "32-bit Linux emulation"); 968 969static u_long linux32_maxdsiz = LINUX32_MAXDSIZ; 970SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW, 971 &linux32_maxdsiz, 0, ""); 972static u_long linux32_maxssiz = LINUX32_MAXSSIZ; 973SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW, 974 &linux32_maxssiz, 0, ""); 975static u_long linux32_maxvmem = LINUX32_MAXVMEM; 976SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, 977 &linux32_maxvmem, 0, ""); 978
| 121 122static int elf_linux_fixup(register_t **stack_base, 123 struct image_params *iparams); 124static register_t *linux_copyout_strings(struct image_params *imgp); 125static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 126static void exec_linux_setregs(struct thread *td, 127 struct image_params *imgp, u_long stack); 128static void linux32_fixlimit(struct rlimit *rl, int which); 129static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel); 130static void linux_vdso_install(void *param); 131static void linux_vdso_deinstall(void *param); 132 133static eventhandler_tag linux_exit_tag; 134static eventhandler_tag linux_exec_tag; 135static eventhandler_tag linux_thread_dtor_tag; 136 137/* 138 * Linux syscalls return negative errno's, we do positive and map them 139 * Reference: 140 * FreeBSD: src/sys/sys/errno.h 141 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h 142 * linux-2.6.17.8/include/asm-generic/errno.h 143 */ 144static int bsd_to_linux_errno[ELAST + 1] = { 145 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 146 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 147 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 148 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 149 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 150 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 151 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 152 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 153 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, 154 -72, -67, -71 155}; 156 157int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 158 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 159 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 160 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 161 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 162 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 163 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 164 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 165 0, LINUX_SIGUSR1, LINUX_SIGUSR2 166}; 167 168int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 169 SIGHUP, SIGINT, SIGQUIT, SIGILL, 170 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 171 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 172 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 173 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 174 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 175 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 176 SIGIO, SIGURG, SIGSYS 177}; 178 179#define LINUX_T_UNKNOWN 255 180static int _bsd_to_linux_trapcode[] = { 181 LINUX_T_UNKNOWN, /* 0 */ 182 6, /* 1 T_PRIVINFLT */ 183 LINUX_T_UNKNOWN, /* 2 */ 184 3, /* 3 T_BPTFLT */ 185 LINUX_T_UNKNOWN, /* 4 */ 186 LINUX_T_UNKNOWN, /* 5 */ 187 16, /* 6 T_ARITHTRAP */ 188 254, /* 7 T_ASTFLT */ 189 LINUX_T_UNKNOWN, /* 8 */ 190 13, /* 9 T_PROTFLT */ 191 1, /* 10 T_TRCTRAP */ 192 LINUX_T_UNKNOWN, /* 11 */ 193 14, /* 12 T_PAGEFLT */ 194 LINUX_T_UNKNOWN, /* 13 */ 195 17, /* 14 T_ALIGNFLT */ 196 LINUX_T_UNKNOWN, /* 15 */ 197 LINUX_T_UNKNOWN, /* 16 */ 198 LINUX_T_UNKNOWN, /* 17 */ 199 0, /* 18 T_DIVIDE */ 200 2, /* 19 T_NMI */ 201 4, /* 20 T_OFLOW */ 202 5, /* 21 T_BOUND */ 203 7, /* 22 T_DNA */ 204 8, /* 23 T_DOUBLEFLT */ 205 9, /* 24 T_FPOPFLT */ 206 10, /* 25 T_TSSFLT */ 207 11, /* 26 T_SEGNPFLT */ 208 12, /* 27 T_STKFLT */ 209 18, /* 28 T_MCHK */ 210 19, /* 29 T_XMMFLT */ 211 15 /* 30 T_RESERVED */ 212}; 213#define bsd_to_linux_trapcode(code) \ 214 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 215 _bsd_to_linux_trapcode[(code)]: \ 216 LINUX_T_UNKNOWN) 217 218struct linux32_ps_strings { 219 u_int32_t ps_argvstr; /* first of 0 or more argument strings */ 220 u_int ps_nargvstr; /* the number of argument strings */ 221 u_int32_t ps_envstr; /* first of 0 or more environment strings */ 222 u_int ps_nenvstr; /* the number of environment strings */ 223}; 224 225LINUX_VDSO_SYM_INTPTR(linux32_sigcode); 226LINUX_VDSO_SYM_INTPTR(linux32_rt_sigcode); 227LINUX_VDSO_SYM_INTPTR(linux32_vsyscall); 228LINUX_VDSO_SYM_CHAR(linux_platform); 229 230/* 231 * If FreeBSD & Linux have a difference of opinion about what a trap 232 * means, deal with it here. 233 * 234 * MPSAFE 235 */ 236static int 237translate_traps(int signal, int trap_code) 238{ 239 if (signal != SIGBUS) 240 return signal; 241 switch (trap_code) { 242 case T_PROTFLT: 243 case T_TSSFLT: 244 case T_DOUBLEFLT: 245 case T_PAGEFLT: 246 return SIGSEGV; 247 default: 248 return signal; 249 } 250} 251 252static int 253elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 254{ 255 Elf32_Auxargs *args; 256 Elf32_Addr *base; 257 Elf32_Addr *pos; 258 struct linux32_ps_strings *arginfo; 259 260 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 261 262 KASSERT(curthread->td_proc == imgp->proc, 263 ("unsafe elf_linux_fixup(), should be curproc")); 264 base = (Elf32_Addr *)*stack_base; 265 args = (Elf32_Auxargs *)imgp->auxargs; 266 pos = base + (imgp->args->argc + imgp->args->envc + 2); 267 268 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO_EHDR, 269 imgp->proc->p_sysent->sv_shared_page_base); 270 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO, linux32_vsyscall); 271 AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature); 272 273 /* 274 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, 275 * as it has appeared in the 2.4.0-rc7 first time. 276 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), 277 * glibc falls back to the hard-coded CLK_TCK value when aux entry 278 * is not present. 279 * Also see linux_times() implementation. 280 */ 281 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) 282 AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz); 283 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr); 284 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent); 285 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum); 286 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz); 287 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags); 288 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry); 289 AUXARGS_ENTRY_32(pos, AT_BASE, args->base); 290 AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0); 291 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 292 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 293 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 294 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 295 AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform)); 296 if (args->execfd != -1) 297 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd); 298 AUXARGS_ENTRY_32(pos, AT_NULL, 0); 299 300 free(imgp->auxargs, M_TEMP); 301 imgp->auxargs = NULL; 302 303 base--; 304 suword32(base, (uint32_t)imgp->args->argc); 305 *stack_base = (register_t *)base; 306 return (0); 307} 308 309static void 310linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 311{ 312 struct thread *td = curthread; 313 struct proc *p = td->td_proc; 314 struct sigacts *psp; 315 struct trapframe *regs; 316 struct l_rt_sigframe *fp, frame; 317 int oonstack; 318 int sig; 319 int code; 320 321 sig = ksi->ksi_signo; 322 code = ksi->ksi_code; 323 PROC_LOCK_ASSERT(p, MA_OWNED); 324 psp = p->p_sigacts; 325 mtx_assert(&psp->ps_mtx, MA_OWNED); 326 regs = td->td_frame; 327 oonstack = sigonstack(regs->tf_rsp); 328 329#ifdef DEBUG 330 if (ldebug(rt_sendsig)) 331 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 332 catcher, sig, (void*)mask, code); 333#endif 334 /* 335 * Allocate space for the signal handler context. 336 */ 337 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 338 SIGISMEMBER(psp->ps_sigonstack, sig)) { 339 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 340 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 341 } else 342 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; 343 mtx_unlock(&psp->ps_mtx); 344 345 /* 346 * Build the argument list for the signal handler. 347 */ 348 if (p->p_sysent->sv_sigtbl) 349 if (sig <= p->p_sysent->sv_sigsize) 350 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 351 352 bzero(&frame, sizeof(frame)); 353 354 frame.sf_handler = PTROUT(catcher); 355 frame.sf_sig = sig; 356 frame.sf_siginfo = PTROUT(&fp->sf_si); 357 frame.sf_ucontext = PTROUT(&fp->sf_sc); 358 359 /* Fill in POSIX parts */ 360 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); 361 362 /* 363 * Build the signal context to be used by sigreturn 364 * and libgcc unwind. 365 */ 366 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 367 frame.sf_sc.uc_link = 0; /* XXX ??? */ 368 369 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); 370 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 371 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 372 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 373 PROC_UNLOCK(p); 374 375 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 376 377 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 378 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; 379 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; 380 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; 381 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; 382 frame.sf_sc.uc_mcontext.sc_esp = regs->tf_rsp; 383 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; 384 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; 385 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; 386 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; 387 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 388 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs; 389 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 390 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 391 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 392 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; 393 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; 394 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 395 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 396 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 397 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 398 399#ifdef DEBUG 400 if (ldebug(rt_sendsig)) 401 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 402 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 403 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 404#endif 405 406 if (copyout(&frame, fp, sizeof(frame)) != 0) { 407 /* 408 * Process has trashed its stack; give it an illegal 409 * instruction to halt it in its tracks. 410 */ 411#ifdef DEBUG 412 if (ldebug(rt_sendsig)) 413 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 414 fp, oonstack); 415#endif 416 PROC_LOCK(p); 417 sigexit(td, SIGILL); 418 } 419 420 /* 421 * Build context to run handler in. 422 */ 423 regs->tf_rsp = PTROUT(fp); 424 regs->tf_rip = linux32_rt_sigcode; 425 regs->tf_rflags &= ~(PSL_T | PSL_D); 426 regs->tf_cs = _ucode32sel; 427 regs->tf_ss = _udatasel; 428 regs->tf_ds = _udatasel; 429 regs->tf_es = _udatasel; 430 regs->tf_fs = _ufssel; 431 regs->tf_gs = _ugssel; 432 regs->tf_flags = TF_HASSEGS; 433 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 434 PROC_LOCK(p); 435 mtx_lock(&psp->ps_mtx); 436} 437 438 439/* 440 * Send an interrupt to process. 441 * 442 * Stack is set up to allow sigcode stored 443 * in u. to call routine, followed by kcall 444 * to sigreturn routine below. After sigreturn 445 * resets the signal mask, the stack, and the 446 * frame pointer, it returns to the user 447 * specified pc, psl. 448 */ 449static void 450linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 451{ 452 struct thread *td = curthread; 453 struct proc *p = td->td_proc; 454 struct sigacts *psp; 455 struct trapframe *regs; 456 struct l_sigframe *fp, frame; 457 l_sigset_t lmask; 458 int oonstack, i; 459 int sig, code; 460 461 sig = ksi->ksi_signo; 462 code = ksi->ksi_code; 463 PROC_LOCK_ASSERT(p, MA_OWNED); 464 psp = p->p_sigacts; 465 mtx_assert(&psp->ps_mtx, MA_OWNED); 466 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 467 /* Signal handler installed with SA_SIGINFO. */ 468 linux_rt_sendsig(catcher, ksi, mask); 469 return; 470 } 471 472 regs = td->td_frame; 473 oonstack = sigonstack(regs->tf_rsp); 474 475#ifdef DEBUG 476 if (ldebug(sendsig)) 477 printf(ARGS(sendsig, "%p, %d, %p, %u"), 478 catcher, sig, (void*)mask, code); 479#endif 480 481 /* 482 * Allocate space for the signal handler context. 483 */ 484 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 485 SIGISMEMBER(psp->ps_sigonstack, sig)) { 486 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 487 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 488 } else 489 fp = (struct l_sigframe *)regs->tf_rsp - 1; 490 mtx_unlock(&psp->ps_mtx); 491 PROC_UNLOCK(p); 492 493 /* 494 * Build the argument list for the signal handler. 495 */ 496 if (p->p_sysent->sv_sigtbl) 497 if (sig <= p->p_sysent->sv_sigsize) 498 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 499 500 bzero(&frame, sizeof(frame)); 501 502 frame.sf_handler = PTROUT(catcher); 503 frame.sf_sig = sig; 504 505 bsd_to_linux_sigset(mask, &lmask); 506 507 /* 508 * Build the signal context to be used by sigreturn. 509 */ 510 frame.sf_sc.sc_mask = lmask.__bits[0]; 511 frame.sf_sc.sc_gs = regs->tf_gs; 512 frame.sf_sc.sc_fs = regs->tf_fs; 513 frame.sf_sc.sc_es = regs->tf_es; 514 frame.sf_sc.sc_ds = regs->tf_ds; 515 frame.sf_sc.sc_edi = regs->tf_rdi; 516 frame.sf_sc.sc_esi = regs->tf_rsi; 517 frame.sf_sc.sc_ebp = regs->tf_rbp; 518 frame.sf_sc.sc_ebx = regs->tf_rbx; 519 frame.sf_sc.sc_esp = regs->tf_rsp; 520 frame.sf_sc.sc_edx = regs->tf_rdx; 521 frame.sf_sc.sc_ecx = regs->tf_rcx; 522 frame.sf_sc.sc_eax = regs->tf_rax; 523 frame.sf_sc.sc_eip = regs->tf_rip; 524 frame.sf_sc.sc_cs = regs->tf_cs; 525 frame.sf_sc.sc_eflags = regs->tf_rflags; 526 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; 527 frame.sf_sc.sc_ss = regs->tf_ss; 528 frame.sf_sc.sc_err = regs->tf_err; 529 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 530 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 531 532 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 533 frame.sf_extramask[i] = lmask.__bits[i+1]; 534 535 if (copyout(&frame, fp, sizeof(frame)) != 0) { 536 /* 537 * Process has trashed its stack; give it an illegal 538 * instruction to halt it in its tracks. 539 */ 540 PROC_LOCK(p); 541 sigexit(td, SIGILL); 542 } 543 544 /* 545 * Build context to run handler in. 546 */ 547 regs->tf_rsp = PTROUT(fp); 548 regs->tf_rip = linux32_sigcode; 549 regs->tf_rflags &= ~(PSL_T | PSL_D); 550 regs->tf_cs = _ucode32sel; 551 regs->tf_ss = _udatasel; 552 regs->tf_ds = _udatasel; 553 regs->tf_es = _udatasel; 554 regs->tf_fs = _ufssel; 555 regs->tf_gs = _ugssel; 556 regs->tf_flags = TF_HASSEGS; 557 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 558 PROC_LOCK(p); 559 mtx_lock(&psp->ps_mtx); 560} 561 562/* 563 * System call to cleanup state after a signal 564 * has been taken. Reset signal mask and 565 * stack state from context left by sendsig (above). 566 * Return to previous pc and psl as specified by 567 * context left by sendsig. Check carefully to 568 * make sure that the user has not modified the 569 * psl to gain improper privileges or to cause 570 * a machine fault. 571 */ 572int 573linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 574{ 575 struct l_sigframe frame; 576 struct trapframe *regs; 577 sigset_t bmask; 578 l_sigset_t lmask; 579 int eflags, i; 580 ksiginfo_t ksi; 581 582 regs = td->td_frame; 583 584#ifdef DEBUG 585 if (ldebug(sigreturn)) 586 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 587#endif 588 /* 589 * The trampoline code hands us the sigframe. 590 * It is unsafe to keep track of it ourselves, in the event that a 591 * program jumps out of a signal handler. 592 */ 593 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 594 return (EFAULT); 595 596 /* 597 * Check for security violations. 598 */ 599#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 600 eflags = frame.sf_sc.sc_eflags; 601 if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) 602 return(EINVAL); 603 604 /* 605 * Don't allow users to load a valid privileged %cs. Let the 606 * hardware check for invalid selectors, excess privilege in 607 * other selectors, invalid %eip's and invalid %esp's. 608 */ 609#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 610 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 611 ksiginfo_init_trap(&ksi); 612 ksi.ksi_signo = SIGBUS; 613 ksi.ksi_code = BUS_OBJERR; 614 ksi.ksi_trapno = T_PROTFLT; 615 ksi.ksi_addr = (void *)regs->tf_rip; 616 trapsignal(td, &ksi); 617 return(EINVAL); 618 } 619 620 lmask.__bits[0] = frame.sf_sc.sc_mask; 621 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 622 lmask.__bits[i+1] = frame.sf_extramask[i]; 623 linux_to_bsd_sigset(&lmask, &bmask); 624 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 625 626 /* 627 * Restore signal context. 628 */ 629 regs->tf_rdi = frame.sf_sc.sc_edi; 630 regs->tf_rsi = frame.sf_sc.sc_esi; 631 regs->tf_rbp = frame.sf_sc.sc_ebp; 632 regs->tf_rbx = frame.sf_sc.sc_ebx; 633 regs->tf_rdx = frame.sf_sc.sc_edx; 634 regs->tf_rcx = frame.sf_sc.sc_ecx; 635 regs->tf_rax = frame.sf_sc.sc_eax; 636 regs->tf_rip = frame.sf_sc.sc_eip; 637 regs->tf_cs = frame.sf_sc.sc_cs; 638 regs->tf_ds = frame.sf_sc.sc_ds; 639 regs->tf_es = frame.sf_sc.sc_es; 640 regs->tf_fs = frame.sf_sc.sc_fs; 641 regs->tf_gs = frame.sf_sc.sc_gs; 642 regs->tf_rflags = eflags; 643 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal; 644 regs->tf_ss = frame.sf_sc.sc_ss; 645 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 646 647 return (EJUSTRETURN); 648} 649 650/* 651 * System call to cleanup state after a signal 652 * has been taken. Reset signal mask and 653 * stack state from context left by rt_sendsig (above). 654 * Return to previous pc and psl as specified by 655 * context left by sendsig. Check carefully to 656 * make sure that the user has not modified the 657 * psl to gain improper privileges or to cause 658 * a machine fault. 659 */ 660int 661linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 662{ 663 struct l_ucontext uc; 664 struct l_sigcontext *context; 665 sigset_t bmask; 666 l_stack_t *lss; 667 stack_t ss; 668 struct trapframe *regs; 669 int eflags; 670 ksiginfo_t ksi; 671 672 regs = td->td_frame; 673 674#ifdef DEBUG 675 if (ldebug(rt_sigreturn)) 676 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 677#endif 678 /* 679 * The trampoline code hands us the ucontext. 680 * It is unsafe to keep track of it ourselves, in the event that a 681 * program jumps out of a signal handler. 682 */ 683 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 684 return (EFAULT); 685 686 context = &uc.uc_mcontext; 687 688 /* 689 * Check for security violations. 690 */ 691#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 692 eflags = context->sc_eflags; 693 if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) 694 return(EINVAL); 695 696 /* 697 * Don't allow users to load a valid privileged %cs. Let the 698 * hardware check for invalid selectors, excess privilege in 699 * other selectors, invalid %eip's and invalid %esp's. 700 */ 701#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 702 if (!CS_SECURE(context->sc_cs)) { 703 ksiginfo_init_trap(&ksi); 704 ksi.ksi_signo = SIGBUS; 705 ksi.ksi_code = BUS_OBJERR; 706 ksi.ksi_trapno = T_PROTFLT; 707 ksi.ksi_addr = (void *)regs->tf_rip; 708 trapsignal(td, &ksi); 709 return(EINVAL); 710 } 711 712 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); 713 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 714 715 /* 716 * Restore signal context 717 */ 718 regs->tf_gs = context->sc_gs; 719 regs->tf_fs = context->sc_fs; 720 regs->tf_es = context->sc_es; 721 regs->tf_ds = context->sc_ds; 722 regs->tf_rdi = context->sc_edi; 723 regs->tf_rsi = context->sc_esi; 724 regs->tf_rbp = context->sc_ebp; 725 regs->tf_rbx = context->sc_ebx; 726 regs->tf_rdx = context->sc_edx; 727 regs->tf_rcx = context->sc_ecx; 728 regs->tf_rax = context->sc_eax; 729 regs->tf_rip = context->sc_eip; 730 regs->tf_cs = context->sc_cs; 731 regs->tf_rflags = eflags; 732 regs->tf_rsp = context->sc_esp_at_signal; 733 regs->tf_ss = context->sc_ss; 734 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 735 736 /* 737 * call sigaltstack & ignore results.. 738 */ 739 lss = &uc.uc_stack; 740 ss.ss_sp = PTRIN(lss->ss_sp); 741 ss.ss_size = lss->ss_size; 742 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 743 744#ifdef DEBUG 745 if (ldebug(rt_sigreturn)) 746 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 747 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 748#endif 749 (void)kern_sigaltstack(td, &ss, NULL); 750 751 return (EJUSTRETURN); 752} 753 754static int 755linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa) 756{ 757 struct proc *p; 758 struct trapframe *frame; 759 760 p = td->td_proc; 761 frame = td->td_frame; 762 763 sa->args[0] = frame->tf_rbx; 764 sa->args[1] = frame->tf_rcx; 765 sa->args[2] = frame->tf_rdx; 766 sa->args[3] = frame->tf_rsi; 767 sa->args[4] = frame->tf_rdi; 768 sa->args[5] = frame->tf_rbp; /* Unconfirmed */ 769 sa->code = frame->tf_rax; 770 771 if (sa->code >= p->p_sysent->sv_size) 772 sa->callp = &p->p_sysent->sv_table[0]; 773 else 774 sa->callp = &p->p_sysent->sv_table[sa->code]; 775 sa->narg = sa->callp->sy_narg; 776 777 td->td_retval[0] = 0; 778 td->td_retval[1] = frame->tf_rdx; 779 780 return (0); 781} 782 783/* 784 * If a linux binary is exec'ing something, try this image activator 785 * first. We override standard shell script execution in order to 786 * be able to modify the interpreter path. We only do this if a linux 787 * binary is doing the exec, so we do not create an EXEC module for it. 788 */ 789static int exec_linux_imgact_try(struct image_params *iparams); 790 791static int 792exec_linux_imgact_try(struct image_params *imgp) 793{ 794 const char *head = (const char *)imgp->image_header; 795 char *rpath; 796 int error = -1; 797 798 /* 799 * The interpreter for shell scripts run from a linux binary needs 800 * to be located in /compat/linux if possible in order to recursively 801 * maintain linux path emulation. 802 */ 803 if (((const short *)head)[0] == SHELLMAGIC) { 804 /* 805 * Run our normal shell image activator. If it succeeds attempt 806 * to use the alternate path for the interpreter. If an 807 * alternate * path is found, use our stringspace to store it. 808 */ 809 if ((error = exec_shell_imgact(imgp)) == 0) { 810 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 811 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, 812 AT_FDCWD); 813 if (rpath != NULL) 814 imgp->args->fname_buf = 815 imgp->interpreter_name = rpath; 816 } 817 } 818 return (error); 819} 820 821/* 822 * Clear registers on exec 823 * XXX copied from ia32_signal.c. 824 */ 825static void 826exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack) 827{ 828 struct trapframe *regs = td->td_frame; 829 struct pcb *pcb = td->td_pcb; 830 831 mtx_lock(&dt_lock); 832 if (td->td_proc->p_md.md_ldt != NULL) 833 user_ldt_free(td); 834 else 835 mtx_unlock(&dt_lock); 836 837 critical_enter(); 838 wrmsr(MSR_FSBASE, 0); 839 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ 840 pcb->pcb_fsbase = 0; 841 pcb->pcb_gsbase = 0; 842 critical_exit(); 843 pcb->pcb_initial_fpucw = __LINUX_NPXCW__; 844 845 bzero((char *)regs, sizeof(struct trapframe)); 846 regs->tf_rip = imgp->entry_addr; 847 regs->tf_rsp = stack; 848 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); 849 regs->tf_gs = _ugssel; 850 regs->tf_fs = _ufssel; 851 regs->tf_es = _udatasel; 852 regs->tf_ds = _udatasel; 853 regs->tf_ss = _udatasel; 854 regs->tf_flags = TF_HASSEGS; 855 regs->tf_cs = _ucode32sel; 856 regs->tf_rbx = imgp->ps_strings; 857 858 fpstate_drop(td); 859 860 /* Do full restore on return so that we can change to a different %cs */ 861 set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET); 862 td->td_retval[1] = 0; 863} 864 865/* 866 * XXX copied from ia32_sysvec.c. 867 */ 868static register_t * 869linux_copyout_strings(struct image_params *imgp) 870{ 871 int argc, envc; 872 u_int32_t *vectp; 873 char *stringp, *destp; 874 u_int32_t *stack_base; 875 struct linux32_ps_strings *arginfo; 876 877 /* 878 * Calculate string base and vector table pointers. 879 */ 880 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 881 destp = (caddr_t)arginfo - SPARE_USRSPACE - 882 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); 883 884 /* 885 * If we have a valid auxargs ptr, prepare some room 886 * on the stack. 887 */ 888 if (imgp->auxargs) { 889 /* 890 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 891 * lower compatibility. 892 */ 893 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : 894 (LINUX_AT_COUNT * 2); 895 /* 896 * The '+ 2' is for the null pointers at the end of each of 897 * the arg and env vector sets,and imgp->auxarg_size is room 898 * for argument of Runtime loader. 899 */ 900 vectp = (u_int32_t *) (destp - (imgp->args->argc + 901 imgp->args->envc + 2 + imgp->auxarg_size) * 902 sizeof(u_int32_t)); 903 904 } else 905 /* 906 * The '+ 2' is for the null pointers at the end of each of 907 * the arg and env vector sets 908 */ 909 vectp = (u_int32_t *)(destp - (imgp->args->argc + 910 imgp->args->envc + 2) * sizeof(u_int32_t)); 911 912 /* 913 * vectp also becomes our initial stack base 914 */ 915 stack_base = vectp; 916 917 stringp = imgp->args->begin_argv; 918 argc = imgp->args->argc; 919 envc = imgp->args->envc; 920 /* 921 * Copy out strings - arguments and environment. 922 */ 923 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 924 925 /* 926 * Fill in "ps_strings" struct for ps, w, etc. 927 */ 928 suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp); 929 suword32(&arginfo->ps_nargvstr, argc); 930 931 /* 932 * Fill in argument portion of vector table. 933 */ 934 for (; argc > 0; --argc) { 935 suword32(vectp++, (uint32_t)(intptr_t)destp); 936 while (*stringp++ != 0) 937 destp++; 938 destp++; 939 } 940 941 /* a null vector table pointer separates the argp's from the envp's */ 942 suword32(vectp++, 0); 943 944 suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp); 945 suword32(&arginfo->ps_nenvstr, envc); 946 947 /* 948 * Fill in environment portion of vector table. 949 */ 950 for (; envc > 0; --envc) { 951 suword32(vectp++, (uint32_t)(intptr_t)destp); 952 while (*stringp++ != 0) 953 destp++; 954 destp++; 955 } 956 957 /* end of vector table is a null pointer */ 958 suword32(vectp, 0); 959 960 return ((register_t *)stack_base); 961} 962 963static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0, 964 "32-bit Linux emulation"); 965 966static u_long linux32_maxdsiz = LINUX32_MAXDSIZ; 967SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW, 968 &linux32_maxdsiz, 0, ""); 969static u_long linux32_maxssiz = LINUX32_MAXSSIZ; 970SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW, 971 &linux32_maxssiz, 0, ""); 972static u_long linux32_maxvmem = LINUX32_MAXVMEM; 973SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, 974 &linux32_maxvmem, 0, ""); 975
|
979static void 980linux32_fixlimit(struct rlimit *rl, int which) 981{ 982 983 switch (which) { 984 case RLIMIT_DATA: 985 if (linux32_maxdsiz != 0) { 986 if (rl->rlim_cur > linux32_maxdsiz) 987 rl->rlim_cur = linux32_maxdsiz; 988 if (rl->rlim_max > linux32_maxdsiz) 989 rl->rlim_max = linux32_maxdsiz; 990 } 991 break; 992 case RLIMIT_STACK: 993 if (linux32_maxssiz != 0) { 994 if (rl->rlim_cur > linux32_maxssiz) 995 rl->rlim_cur = linux32_maxssiz; 996 if (rl->rlim_max > linux32_maxssiz) 997 rl->rlim_max = linux32_maxssiz; 998 } 999 break; 1000 case RLIMIT_VMEM: 1001 if (linux32_maxvmem != 0) { 1002 if (rl->rlim_cur > linux32_maxvmem) 1003 rl->rlim_cur = linux32_maxvmem; 1004 if (rl->rlim_max > linux32_maxvmem) 1005 rl->rlim_max = linux32_maxvmem; 1006 } 1007 break; 1008 } 1009} 1010 1011struct sysentvec elf_linux_sysvec = { 1012 .sv_size = LINUX_SYS_MAXSYSCALL, 1013 .sv_table = linux_sysent, 1014 .sv_mask = 0, 1015 .sv_sigsize = LINUX_SIGTBLSZ, 1016 .sv_sigtbl = bsd_to_linux_signal, 1017 .sv_errsize = ELAST + 1, 1018 .sv_errtbl = bsd_to_linux_errno, 1019 .sv_transtrap = translate_traps, 1020 .sv_fixup = elf_linux_fixup, 1021 .sv_sendsig = linux_sendsig, 1022 .sv_sigcode = &_binary_linux32_locore_o_start, 1023 .sv_szsigcode = &linux_szsigcode, 1024 .sv_prepsyscall = NULL, 1025 .sv_name = "Linux ELF32", 1026 .sv_coredump = elf32_coredump, 1027 .sv_imgact_try = exec_linux_imgact_try, 1028 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 1029 .sv_pagesize = PAGE_SIZE, 1030 .sv_minuser = VM_MIN_ADDRESS, 1031 .sv_maxuser = LINUX32_MAXUSER, 1032 .sv_usrstack = LINUX32_USRSTACK, 1033 .sv_psstrings = LINUX32_PS_STRINGS, 1034 .sv_stackprot = VM_PROT_ALL, 1035 .sv_copyout_strings = linux_copyout_strings, 1036 .sv_setregs = exec_linux_setregs, 1037 .sv_fixlimit = linux32_fixlimit, 1038 .sv_maxssiz = &linux32_maxssiz, 1039 .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP, 1040 .sv_set_syscall_retval = cpu_set_syscall_retval, 1041 .sv_fetch_syscall_args = linux32_fetch_syscall_args, 1042 .sv_syscallnames = NULL, 1043 .sv_shared_page_base = LINUX32_SHAREDPAGE, 1044 .sv_shared_page_len = PAGE_SIZE, 1045 .sv_schedtail = linux_schedtail, 1046 .sv_thread_detach = linux_thread_detach, 1047}; 1048 1049static void 1050linux_vdso_install(void *param) 1051{ 1052 1053 linux_szsigcode = (&_binary_linux32_locore_o_end - 1054 &_binary_linux32_locore_o_start); 1055 1056 if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) 1057 panic("Linux invalid vdso size\n"); 1058 1059 __elfN(linux_vdso_fixup)(&elf_linux_sysvec); 1060 1061 linux_shared_page_obj = __elfN(linux_shared_page_init) 1062 (&linux_shared_page_mapping); 1063 1064 __elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX32_SHAREDPAGE); 1065 1066 bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, 1067 linux_szsigcode); 1068 elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; 1069 1070 linux_kplatform = linux_shared_page_mapping + 1071 (linux_platform - (caddr_t)LINUX32_SHAREDPAGE); 1072} 1073SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, 1074 (sysinit_cfunc_t)linux_vdso_install, NULL); 1075 1076static void 1077linux_vdso_deinstall(void *param) 1078{ 1079 1080 __elfN(linux_shared_page_fini)(linux_shared_page_obj); 1081}; 1082SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, 1083 (sysinit_cfunc_t)linux_vdso_deinstall, NULL); 1084 1085static char GNU_ABI_VENDOR[] = "GNU"; 1086static int GNULINUX_ABI_DESC = 0; 1087 1088static boolean_t 1089linux32_trans_osrel(const Elf_Note *note, int32_t *osrel) 1090{ 1091 const Elf32_Word *desc; 1092 uintptr_t p; 1093 1094 p = (uintptr_t)(note + 1); 1095 p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); 1096 1097 desc = (const Elf32_Word *)p; 1098 if (desc[0] != GNULINUX_ABI_DESC) 1099 return (FALSE); 1100 1101 /* 1102 * For linux we encode osrel as follows (see linux_mib.c): 1103 * VVVMMMIII (version, major, minor), see linux_mib.c. 1104 */ 1105 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3]; 1106 1107 return (TRUE); 1108} 1109 1110static Elf_Brandnote linux32_brandnote = { 1111 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), 1112 .hdr.n_descsz = 16, /* XXX at least 16 */ 1113 .hdr.n_type = 1, 1114 .vendor = GNU_ABI_VENDOR, 1115 .flags = BN_TRANSLATE_OSREL, 1116 .trans_osrel = linux32_trans_osrel 1117}; 1118 1119static Elf32_Brandinfo linux_brand = { 1120 .brand = ELFOSABI_LINUX, 1121 .machine = EM_386, 1122 .compat_3_brand = "Linux", 1123 .emul_path = "/compat/linux", 1124 .interp_path = "/lib/ld-linux.so.1", 1125 .sysvec = &elf_linux_sysvec, 1126 .interp_newpath = NULL, 1127 .brand_note = &linux32_brandnote, 1128 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1129}; 1130 1131static Elf32_Brandinfo linux_glibc2brand = { 1132 .brand = ELFOSABI_LINUX, 1133 .machine = EM_386, 1134 .compat_3_brand = "Linux", 1135 .emul_path = "/compat/linux", 1136 .interp_path = "/lib/ld-linux.so.2", 1137 .sysvec = &elf_linux_sysvec, 1138 .interp_newpath = NULL, 1139 .brand_note = &linux32_brandnote, 1140 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1141}; 1142 1143Elf32_Brandinfo *linux_brandlist[] = { 1144 &linux_brand, 1145 &linux_glibc2brand, 1146 NULL 1147}; 1148 1149static int 1150linux_elf_modevent(module_t mod, int type, void *data) 1151{ 1152 Elf32_Brandinfo **brandinfo; 1153 int error; 1154 struct linux_ioctl_handler **lihp;
| 983static void 984linux32_fixlimit(struct rlimit *rl, int which) 985{ 986 987 switch (which) { 988 case RLIMIT_DATA: 989 if (linux32_maxdsiz != 0) { 990 if (rl->rlim_cur > linux32_maxdsiz) 991 rl->rlim_cur = linux32_maxdsiz; 992 if (rl->rlim_max > linux32_maxdsiz) 993 rl->rlim_max = linux32_maxdsiz; 994 } 995 break; 996 case RLIMIT_STACK: 997 if (linux32_maxssiz != 0) { 998 if (rl->rlim_cur > linux32_maxssiz) 999 rl->rlim_cur = linux32_maxssiz; 1000 if (rl->rlim_max > linux32_maxssiz) 1001 rl->rlim_max = linux32_maxssiz; 1002 } 1003 break; 1004 case RLIMIT_VMEM: 1005 if (linux32_maxvmem != 0) { 1006 if (rl->rlim_cur > linux32_maxvmem) 1007 rl->rlim_cur = linux32_maxvmem; 1008 if (rl->rlim_max > linux32_maxvmem) 1009 rl->rlim_max = linux32_maxvmem; 1010 } 1011 break; 1012 } 1013} 1014 1015struct sysentvec elf_linux_sysvec = { 1016 .sv_size = LINUX_SYS_MAXSYSCALL, 1017 .sv_table = linux_sysent, 1018 .sv_mask = 0, 1019 .sv_sigsize = LINUX_SIGTBLSZ, 1020 .sv_sigtbl = bsd_to_linux_signal, 1021 .sv_errsize = ELAST + 1, 1022 .sv_errtbl = bsd_to_linux_errno, 1023 .sv_transtrap = translate_traps, 1024 .sv_fixup = elf_linux_fixup, 1025 .sv_sendsig = linux_sendsig, 1026 .sv_sigcode = &_binary_linux32_locore_o_start, 1027 .sv_szsigcode = &linux_szsigcode, 1028 .sv_prepsyscall = NULL, 1029 .sv_name = "Linux ELF32", 1030 .sv_coredump = elf32_coredump, 1031 .sv_imgact_try = exec_linux_imgact_try, 1032 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 1033 .sv_pagesize = PAGE_SIZE, 1034 .sv_minuser = VM_MIN_ADDRESS, 1035 .sv_maxuser = LINUX32_MAXUSER, 1036 .sv_usrstack = LINUX32_USRSTACK, 1037 .sv_psstrings = LINUX32_PS_STRINGS, 1038 .sv_stackprot = VM_PROT_ALL, 1039 .sv_copyout_strings = linux_copyout_strings, 1040 .sv_setregs = exec_linux_setregs, 1041 .sv_fixlimit = linux32_fixlimit, 1042 .sv_maxssiz = &linux32_maxssiz, 1043 .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP, 1044 .sv_set_syscall_retval = cpu_set_syscall_retval, 1045 .sv_fetch_syscall_args = linux32_fetch_syscall_args, 1046 .sv_syscallnames = NULL, 1047 .sv_shared_page_base = LINUX32_SHAREDPAGE, 1048 .sv_shared_page_len = PAGE_SIZE, 1049 .sv_schedtail = linux_schedtail, 1050 .sv_thread_detach = linux_thread_detach, 1051}; 1052 1053static void 1054linux_vdso_install(void *param) 1055{ 1056 1057 linux_szsigcode = (&_binary_linux32_locore_o_end - 1058 &_binary_linux32_locore_o_start); 1059 1060 if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) 1061 panic("Linux invalid vdso size\n"); 1062 1063 __elfN(linux_vdso_fixup)(&elf_linux_sysvec); 1064 1065 linux_shared_page_obj = __elfN(linux_shared_page_init) 1066 (&linux_shared_page_mapping); 1067 1068 __elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX32_SHAREDPAGE); 1069 1070 bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, 1071 linux_szsigcode); 1072 elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; 1073 1074 linux_kplatform = linux_shared_page_mapping + 1075 (linux_platform - (caddr_t)LINUX32_SHAREDPAGE); 1076} 1077SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, 1078 (sysinit_cfunc_t)linux_vdso_install, NULL); 1079 1080static void 1081linux_vdso_deinstall(void *param) 1082{ 1083 1084 __elfN(linux_shared_page_fini)(linux_shared_page_obj); 1085}; 1086SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, 1087 (sysinit_cfunc_t)linux_vdso_deinstall, NULL); 1088 1089static char GNU_ABI_VENDOR[] = "GNU"; 1090static int GNULINUX_ABI_DESC = 0; 1091 1092static boolean_t 1093linux32_trans_osrel(const Elf_Note *note, int32_t *osrel) 1094{ 1095 const Elf32_Word *desc; 1096 uintptr_t p; 1097 1098 p = (uintptr_t)(note + 1); 1099 p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); 1100 1101 desc = (const Elf32_Word *)p; 1102 if (desc[0] != GNULINUX_ABI_DESC) 1103 return (FALSE); 1104 1105 /* 1106 * For linux we encode osrel as follows (see linux_mib.c): 1107 * VVVMMMIII (version, major, minor), see linux_mib.c. 1108 */ 1109 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3]; 1110 1111 return (TRUE); 1112} 1113 1114static Elf_Brandnote linux32_brandnote = { 1115 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), 1116 .hdr.n_descsz = 16, /* XXX at least 16 */ 1117 .hdr.n_type = 1, 1118 .vendor = GNU_ABI_VENDOR, 1119 .flags = BN_TRANSLATE_OSREL, 1120 .trans_osrel = linux32_trans_osrel 1121}; 1122 1123static Elf32_Brandinfo linux_brand = { 1124 .brand = ELFOSABI_LINUX, 1125 .machine = EM_386, 1126 .compat_3_brand = "Linux", 1127 .emul_path = "/compat/linux", 1128 .interp_path = "/lib/ld-linux.so.1", 1129 .sysvec = &elf_linux_sysvec, 1130 .interp_newpath = NULL, 1131 .brand_note = &linux32_brandnote, 1132 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1133}; 1134 1135static Elf32_Brandinfo linux_glibc2brand = { 1136 .brand = ELFOSABI_LINUX, 1137 .machine = EM_386, 1138 .compat_3_brand = "Linux", 1139 .emul_path = "/compat/linux", 1140 .interp_path = "/lib/ld-linux.so.2", 1141 .sysvec = &elf_linux_sysvec, 1142 .interp_newpath = NULL, 1143 .brand_note = &linux32_brandnote, 1144 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1145}; 1146 1147Elf32_Brandinfo *linux_brandlist[] = { 1148 &linux_brand, 1149 &linux_glibc2brand, 1150 NULL 1151}; 1152 1153static int 1154linux_elf_modevent(module_t mod, int type, void *data) 1155{ 1156 Elf32_Brandinfo **brandinfo; 1157 int error; 1158 struct linux_ioctl_handler **lihp;
|