linux_sysvec.c revision 191966
1/*- 2 * Copyright (c) 1994-1996 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: head/sys/i386/linux/linux_sysvec.c 191966 2009-05-10 18:16:07Z dchagin $"); 31 32#include <sys/param.h> 33#include <sys/systm.h> 34#include <sys/exec.h> 35#include <sys/fcntl.h> 36#include <sys/imgact.h> 37#include <sys/imgact_aout.h> 38#include <sys/imgact_elf.h> 39#include <sys/kernel.h> 40#include <sys/lock.h> 41#include <sys/malloc.h> 42#include <sys/module.h> 43#include <sys/mutex.h> 44#include <sys/proc.h> 45#include <sys/signalvar.h> 46#include <sys/syscallsubr.h> 47#include <sys/sysent.h> 48#include <sys/sysproto.h> 49#include <sys/vnode.h> 50#include <sys/eventhandler.h> 51 52#include <vm/vm.h> 53#include <vm/pmap.h> 54#include <vm/vm_extern.h> 55#include <vm/vm_map.h> 56#include <vm/vm_object.h> 57#include <vm/vm_page.h> 58#include <vm/vm_param.h> 59 60#include <machine/cpu.h> 61#include <machine/cputypes.h> 62#include <machine/md_var.h> 63#include <machine/pcb.h> 64 65#include <i386/linux/linux.h> 66#include <i386/linux/linux_proto.h> 67#include <compat/linux/linux_futex.h> 68#include <compat/linux/linux_emul.h> 69#include <compat/linux/linux_mib.h> 70#include <compat/linux/linux_misc.h> 71#include <compat/linux/linux_signal.h> 72#include <compat/linux/linux_util.h> 73 74MODULE_VERSION(linux, 1); 75 76MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 77 78#if BYTE_ORDER == LITTLE_ENDIAN 79#define SHELLMAGIC 0x2123 /* #! */ 80#else 81#define SHELLMAGIC 0x2321 82#endif 83 84/* 85 * Allow the sendsig functions to use the ldebug() facility 86 * even though they are not syscalls themselves. Map them 87 * to syscall 0. This is slightly less bogus than using 88 * ldebug(sigreturn). 89 */ 90#define LINUX_SYS_linux_rt_sendsig 0 91#define LINUX_SYS_linux_sendsig 0 92 93extern char linux_sigcode[]; 94extern int linux_szsigcode; 95 96extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 97 98SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 99SET_DECLARE(linux_device_handler_set, struct linux_device_handler); 100 101static int linux_fixup(register_t **stack_base, 102 struct image_params *iparams); 103static int elf_linux_fixup(register_t **stack_base, 104 struct image_params *iparams); 105static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 106 caddr_t *params); 107static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 108static void exec_linux_setregs(struct thread *td, u_long entry, 109 u_long stack, u_long ps_strings); 110static register_t *linux_copyout_strings(struct image_params *imgp); 111 112static int linux_szplatform; 113const char *linux_platform; 114 115static eventhandler_tag linux_exit_tag; 116static eventhandler_tag linux_schedtail_tag; 117static eventhandler_tag linux_exec_tag; 118 119/* 120 * Linux syscalls return negative errno's, we do positive and map them 121 * Reference: 122 * FreeBSD: src/sys/sys/errno.h 123 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h 124 * linux-2.6.17.8/include/asm-generic/errno.h 125 */ 126static int bsd_to_linux_errno[ELAST + 1] = { 127 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 128 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 129 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 130 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 131 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 132 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 133 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 134 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 135 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, 136 -72, -67, -71 137}; 138 139int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 140 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 141 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 142 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 143 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 144 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 145 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 146 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 147 0, LINUX_SIGUSR1, LINUX_SIGUSR2 148}; 149 150int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 151 SIGHUP, SIGINT, SIGQUIT, SIGILL, 152 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 153 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 154 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 155 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 156 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 157 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 158 SIGIO, SIGURG, SIGSYS 159}; 160 161#define LINUX_T_UNKNOWN 255 162static int _bsd_to_linux_trapcode[] = { 163 LINUX_T_UNKNOWN, /* 0 */ 164 6, /* 1 T_PRIVINFLT */ 165 LINUX_T_UNKNOWN, /* 2 */ 166 3, /* 3 T_BPTFLT */ 167 LINUX_T_UNKNOWN, /* 4 */ 168 LINUX_T_UNKNOWN, /* 5 */ 169 16, /* 6 T_ARITHTRAP */ 170 254, /* 7 T_ASTFLT */ 171 LINUX_T_UNKNOWN, /* 8 */ 172 13, /* 9 T_PROTFLT */ 173 1, /* 10 T_TRCTRAP */ 174 LINUX_T_UNKNOWN, /* 11 */ 175 14, /* 12 T_PAGEFLT */ 176 LINUX_T_UNKNOWN, /* 13 */ 177 17, /* 14 T_ALIGNFLT */ 178 LINUX_T_UNKNOWN, /* 15 */ 179 LINUX_T_UNKNOWN, /* 16 */ 180 LINUX_T_UNKNOWN, /* 17 */ 181 0, /* 18 T_DIVIDE */ 182 2, /* 19 T_NMI */ 183 4, /* 20 T_OFLOW */ 184 5, /* 21 T_BOUND */ 185 7, /* 22 T_DNA */ 186 8, /* 23 T_DOUBLEFLT */ 187 9, /* 24 T_FPOPFLT */ 188 10, /* 25 T_TSSFLT */ 189 11, /* 26 T_SEGNPFLT */ 190 12, /* 27 T_STKFLT */ 191 18, /* 28 T_MCHK */ 192 19, /* 29 T_XMMFLT */ 193 15 /* 30 T_RESERVED */ 194}; 195#define bsd_to_linux_trapcode(code) \ 196 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 197 _bsd_to_linux_trapcode[(code)]: \ 198 LINUX_T_UNKNOWN) 199 200/* 201 * If FreeBSD & Linux have a difference of opinion about what a trap 202 * means, deal with it here. 203 * 204 * MPSAFE 205 */ 206static int 207translate_traps(int signal, int trap_code) 208{ 209 if (signal != SIGBUS) 210 return signal; 211 switch (trap_code) { 212 case T_PROTFLT: 213 case T_TSSFLT: 214 case T_DOUBLEFLT: 215 case T_PAGEFLT: 216 return SIGSEGV; 217 default: 218 return signal; 219 } 220} 221 222static int 223linux_fixup(register_t **stack_base, struct image_params *imgp) 224{ 225 register_t *argv, *envp; 226 227 argv = *stack_base; 228 envp = *stack_base + (imgp->args->argc + 1); 229 (*stack_base)--; 230 **stack_base = (intptr_t)(void *)envp; 231 (*stack_base)--; 232 **stack_base = (intptr_t)(void *)argv; 233 (*stack_base)--; 234 **stack_base = imgp->args->argc; 235 return (0); 236} 237 238static int 239elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 240{ 241 struct proc *p; 242 Elf32_Auxargs *args; 243 Elf32_Addr *uplatform; 244 struct ps_strings *arginfo; 245 register_t *pos; 246 247 KASSERT(curthread->td_proc == imgp->proc, 248 ("unsafe elf_linux_fixup(), should be curproc")); 249 250 p = imgp->proc; 251 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; 252 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode - 253 linux_szplatform); 254 args = (Elf32_Auxargs *)imgp->auxargs; 255 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2); 256 257 AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); 258 AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz); 259 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 260 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 261 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 262 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 263 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 264 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 265 AUXARGS_ENTRY(pos, AT_BASE, args->base); 266 AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0); 267 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 268 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 269 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 270 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 271 AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform)); 272 if (args->execfd != -1) 273 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 274 AUXARGS_ENTRY(pos, AT_NULL, 0); 275 276 free(imgp->auxargs, M_TEMP); 277 imgp->auxargs = NULL; 278 279 (*stack_base)--; 280 **stack_base = (register_t)imgp->args->argc; 281 return (0); 282} 283 284/* 285 * Copied from kern/kern_exec.c 286 */ 287static register_t * 288linux_copyout_strings(struct image_params *imgp) 289{ 290 int argc, envc; 291 char **vectp; 292 char *stringp, *destp; 293 register_t *stack_base; 294 struct ps_strings *arginfo; 295 struct proc *p; 296 297 /* 298 * Calculate string base and vector table pointers. 299 * Also deal with signal trampoline code for this exec type. 300 */ 301 p = imgp->proc; 302 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; 303 destp = (caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE - 304 linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace), 305 sizeof(char *)); 306 307 /* 308 * install sigcode 309 */ 310 copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo - 311 linux_szsigcode), linux_szsigcode); 312 313 /* 314 * install LINUX_PLATFORM 315 */ 316 copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode - 317 linux_szplatform), linux_szplatform); 318 319 /* 320 * If we have a valid auxargs ptr, prepare some room 321 * on the stack. 322 */ 323 if (imgp->auxargs) { 324 /* 325 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 326 * lower compatibility. 327 */ 328 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : 329 (LINUX_AT_COUNT * 2); 330 /* 331 * The '+ 2' is for the null pointers at the end of each of 332 * the arg and env vector sets,and imgp->auxarg_size is room 333 * for argument of Runtime loader. 334 */ 335 vectp = (char **)(destp - (imgp->args->argc + 336 imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *)); 337 } else { 338 /* 339 * The '+ 2' is for the null pointers at the end of each of 340 * the arg and env vector sets 341 */ 342 vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) * 343 sizeof(char *)); 344 } 345 346 /* 347 * vectp also becomes our initial stack base 348 */ 349 stack_base = (register_t *)vectp; 350 351 stringp = imgp->args->begin_argv; 352 argc = imgp->args->argc; 353 envc = imgp->args->envc; 354 355 /* 356 * Copy out strings - arguments and environment. 357 */ 358 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 359 360 /* 361 * Fill in "ps_strings" struct for ps, w, etc. 362 */ 363 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp); 364 suword(&arginfo->ps_nargvstr, argc); 365 366 /* 367 * Fill in argument portion of vector table. 368 */ 369 for (; argc > 0; --argc) { 370 suword(vectp++, (long)(intptr_t)destp); 371 while (*stringp++ != 0) 372 destp++; 373 destp++; 374 } 375 376 /* a null vector table pointer separates the argp's from the envp's */ 377 suword(vectp++, 0); 378 379 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp); 380 suword(&arginfo->ps_nenvstr, envc); 381 382 /* 383 * Fill in environment portion of vector table. 384 */ 385 for (; envc > 0; --envc) { 386 suword(vectp++, (long)(intptr_t)destp); 387 while (*stringp++ != 0) 388 destp++; 389 destp++; 390 } 391 392 /* end of vector table is a null pointer */ 393 suword(vectp, 0); 394 395 return (stack_base); 396} 397 398 399 400extern int _ucodesel, _udatasel; 401extern unsigned long linux_sznonrtsigcode; 402 403static void 404linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 405{ 406 struct thread *td = curthread; 407 struct proc *p = td->td_proc; 408 struct sigacts *psp; 409 struct trapframe *regs; 410 struct l_rt_sigframe *fp, frame; 411 int sig, code; 412 int oonstack; 413 414 sig = ksi->ksi_signo; 415 code = ksi->ksi_code; 416 PROC_LOCK_ASSERT(p, MA_OWNED); 417 psp = p->p_sigacts; 418 mtx_assert(&psp->ps_mtx, MA_OWNED); 419 regs = td->td_frame; 420 oonstack = sigonstack(regs->tf_esp); 421 422#ifdef DEBUG 423 if (ldebug(rt_sendsig)) 424 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 425 catcher, sig, (void*)mask, code); 426#endif 427 /* 428 * Allocate space for the signal handler context. 429 */ 430 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 431 SIGISMEMBER(psp->ps_sigonstack, sig)) { 432 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 433 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 434 } else 435 fp = (struct l_rt_sigframe *)regs->tf_esp - 1; 436 mtx_unlock(&psp->ps_mtx); 437 438 /* 439 * Build the argument list for the signal handler. 440 */ 441 if (p->p_sysent->sv_sigtbl) 442 if (sig <= p->p_sysent->sv_sigsize) 443 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 444 445 bzero(&frame, sizeof(frame)); 446 447 frame.sf_handler = catcher; 448 frame.sf_sig = sig; 449 frame.sf_siginfo = &fp->sf_si; 450 frame.sf_ucontext = &fp->sf_sc; 451 452 /* Fill in POSIX parts */ 453 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); 454 455 /* 456 * Build the signal context to be used by sigreturn. 457 */ 458 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 459 frame.sf_sc.uc_link = NULL; /* XXX ??? */ 460 461 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp; 462 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 463 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 464 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 465 PROC_UNLOCK(p); 466 467 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 468 469 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 470 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 471 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 472 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 473 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 474 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; 475 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; 476 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; 477 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; 478 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; 479 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; 480 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; 481 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; 482 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 483 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; 484 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; 485 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 486 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 487 frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr; 488 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 489 490#ifdef DEBUG 491 if (ldebug(rt_sendsig)) 492 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 493 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 494 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 495#endif 496 497 if (copyout(&frame, fp, sizeof(frame)) != 0) { 498 /* 499 * Process has trashed its stack; give it an illegal 500 * instruction to halt it in its tracks. 501 */ 502#ifdef DEBUG 503 if (ldebug(rt_sendsig)) 504 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 505 fp, oonstack); 506#endif 507 PROC_LOCK(p); 508 sigexit(td, SIGILL); 509 } 510 511 /* 512 * Build context to run handler in. 513 */ 514 regs->tf_esp = (int)fp; 515 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 516 linux_sznonrtsigcode; 517 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); 518 regs->tf_cs = _ucodesel; 519 regs->tf_ds = _udatasel; 520 regs->tf_es = _udatasel; 521 regs->tf_fs = _udatasel; 522 regs->tf_ss = _udatasel; 523 PROC_LOCK(p); 524 mtx_lock(&psp->ps_mtx); 525} 526 527 528/* 529 * Send an interrupt to process. 530 * 531 * Stack is set up to allow sigcode stored 532 * in u. to call routine, followed by kcall 533 * to sigreturn routine below. After sigreturn 534 * resets the signal mask, the stack, and the 535 * frame pointer, it returns to the user 536 * specified pc, psl. 537 */ 538static void 539linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 540{ 541 struct thread *td = curthread; 542 struct proc *p = td->td_proc; 543 struct sigacts *psp; 544 struct trapframe *regs; 545 struct l_sigframe *fp, frame; 546 l_sigset_t lmask; 547 int sig, code; 548 int oonstack, i; 549 550 PROC_LOCK_ASSERT(p, MA_OWNED); 551 psp = p->p_sigacts; 552 sig = ksi->ksi_signo; 553 code = ksi->ksi_code; 554 mtx_assert(&psp->ps_mtx, MA_OWNED); 555 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 556 /* Signal handler installed with SA_SIGINFO. */ 557 linux_rt_sendsig(catcher, ksi, mask); 558 return; 559 } 560 regs = td->td_frame; 561 oonstack = sigonstack(regs->tf_esp); 562 563#ifdef DEBUG 564 if (ldebug(sendsig)) 565 printf(ARGS(sendsig, "%p, %d, %p, %u"), 566 catcher, sig, (void*)mask, code); 567#endif 568 569 /* 570 * Allocate space for the signal handler context. 571 */ 572 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 573 SIGISMEMBER(psp->ps_sigonstack, sig)) { 574 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 575 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 576 } else 577 fp = (struct l_sigframe *)regs->tf_esp - 1; 578 mtx_unlock(&psp->ps_mtx); 579 PROC_UNLOCK(p); 580 581 /* 582 * Build the argument list for the signal handler. 583 */ 584 if (p->p_sysent->sv_sigtbl) 585 if (sig <= p->p_sysent->sv_sigsize) 586 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 587 588 bzero(&frame, sizeof(frame)); 589 590 frame.sf_handler = catcher; 591 frame.sf_sig = sig; 592 593 bsd_to_linux_sigset(mask, &lmask); 594 595 /* 596 * Build the signal context to be used by sigreturn. 597 */ 598 frame.sf_sc.sc_mask = lmask.__bits[0]; 599 frame.sf_sc.sc_gs = rgs(); 600 frame.sf_sc.sc_fs = regs->tf_fs; 601 frame.sf_sc.sc_es = regs->tf_es; 602 frame.sf_sc.sc_ds = regs->tf_ds; 603 frame.sf_sc.sc_edi = regs->tf_edi; 604 frame.sf_sc.sc_esi = regs->tf_esi; 605 frame.sf_sc.sc_ebp = regs->tf_ebp; 606 frame.sf_sc.sc_ebx = regs->tf_ebx; 607 frame.sf_sc.sc_edx = regs->tf_edx; 608 frame.sf_sc.sc_ecx = regs->tf_ecx; 609 frame.sf_sc.sc_eax = regs->tf_eax; 610 frame.sf_sc.sc_eip = regs->tf_eip; 611 frame.sf_sc.sc_cs = regs->tf_cs; 612 frame.sf_sc.sc_eflags = regs->tf_eflags; 613 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 614 frame.sf_sc.sc_ss = regs->tf_ss; 615 frame.sf_sc.sc_err = regs->tf_err; 616 frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr; 617 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno); 618 619 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 620 frame.sf_extramask[i] = lmask.__bits[i+1]; 621 622 if (copyout(&frame, fp, sizeof(frame)) != 0) { 623 /* 624 * Process has trashed its stack; give it an illegal 625 * instruction to halt it in its tracks. 626 */ 627 PROC_LOCK(p); 628 sigexit(td, SIGILL); 629 } 630 631 /* 632 * Build context to run handler in. 633 */ 634 regs->tf_esp = (int)fp; 635 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); 636 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); 637 regs->tf_cs = _ucodesel; 638 regs->tf_ds = _udatasel; 639 regs->tf_es = _udatasel; 640 regs->tf_fs = _udatasel; 641 regs->tf_ss = _udatasel; 642 PROC_LOCK(p); 643 mtx_lock(&psp->ps_mtx); 644} 645 646/* 647 * System call to cleanup state after a signal 648 * has been taken. Reset signal mask and 649 * stack state from context left by sendsig (above). 650 * Return to previous pc and psl as specified by 651 * context left by sendsig. Check carefully to 652 * make sure that the user has not modified the 653 * psl to gain improper privileges or to cause 654 * a machine fault. 655 */ 656int 657linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 658{ 659 struct proc *p = td->td_proc; 660 struct l_sigframe frame; 661 struct trapframe *regs; 662 l_sigset_t lmask; 663 int eflags, i; 664 ksiginfo_t ksi; 665 666 regs = td->td_frame; 667 668#ifdef DEBUG 669 if (ldebug(sigreturn)) 670 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 671#endif 672 /* 673 * The trampoline code hands us the sigframe. 674 * It is unsafe to keep track of it ourselves, in the event that a 675 * program jumps out of a signal handler. 676 */ 677 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 678 return (EFAULT); 679 680 /* 681 * Check for security violations. 682 */ 683#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 684 eflags = frame.sf_sc.sc_eflags; 685 /* 686 * XXX do allow users to change the privileged flag PSL_RF. The 687 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 688 * sometimes set it there too. tf_eflags is kept in the signal 689 * context during signal handling and there is no other place 690 * to remember it, so the PSL_RF bit may be corrupted by the 691 * signal handler without us knowing. Corruption of the PSL_RF 692 * bit at worst causes one more or one less debugger trap, so 693 * allowing it is fairly harmless. 694 */ 695 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 696 return(EINVAL); 697 698 /* 699 * Don't allow users to load a valid privileged %cs. Let the 700 * hardware check for invalid selectors, excess privilege in 701 * other selectors, invalid %eip's and invalid %esp's. 702 */ 703#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 704 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 705 ksiginfo_init_trap(&ksi); 706 ksi.ksi_signo = SIGBUS; 707 ksi.ksi_code = BUS_OBJERR; 708 ksi.ksi_trapno = T_PROTFLT; 709 ksi.ksi_addr = (void *)regs->tf_eip; 710 trapsignal(td, &ksi); 711 return(EINVAL); 712 } 713 714 lmask.__bits[0] = frame.sf_sc.sc_mask; 715 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 716 lmask.__bits[i+1] = frame.sf_extramask[i]; 717 PROC_LOCK(p); 718 linux_to_bsd_sigset(&lmask, &td->td_sigmask); 719 SIG_CANTMASK(td->td_sigmask); 720 signotify(td); 721 PROC_UNLOCK(p); 722 723 /* 724 * Restore signal context. 725 */ 726 /* %gs was restored by the trampoline. */ 727 regs->tf_fs = frame.sf_sc.sc_fs; 728 regs->tf_es = frame.sf_sc.sc_es; 729 regs->tf_ds = frame.sf_sc.sc_ds; 730 regs->tf_edi = frame.sf_sc.sc_edi; 731 regs->tf_esi = frame.sf_sc.sc_esi; 732 regs->tf_ebp = frame.sf_sc.sc_ebp; 733 regs->tf_ebx = frame.sf_sc.sc_ebx; 734 regs->tf_edx = frame.sf_sc.sc_edx; 735 regs->tf_ecx = frame.sf_sc.sc_ecx; 736 regs->tf_eax = frame.sf_sc.sc_eax; 737 regs->tf_eip = frame.sf_sc.sc_eip; 738 regs->tf_cs = frame.sf_sc.sc_cs; 739 regs->tf_eflags = eflags; 740 regs->tf_esp = frame.sf_sc.sc_esp_at_signal; 741 regs->tf_ss = frame.sf_sc.sc_ss; 742 743 return (EJUSTRETURN); 744} 745 746/* 747 * System call to cleanup state after a signal 748 * has been taken. Reset signal mask and 749 * stack state from context left by rt_sendsig (above). 750 * Return to previous pc and psl as specified by 751 * context left by sendsig. Check carefully to 752 * make sure that the user has not modified the 753 * psl to gain improper privileges or to cause 754 * a machine fault. 755 */ 756int 757linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 758{ 759 struct proc *p = td->td_proc; 760 struct l_ucontext uc; 761 struct l_sigcontext *context; 762 l_stack_t *lss; 763 stack_t ss; 764 struct trapframe *regs; 765 int eflags; 766 ksiginfo_t ksi; 767 768 regs = td->td_frame; 769 770#ifdef DEBUG 771 if (ldebug(rt_sigreturn)) 772 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 773#endif 774 /* 775 * The trampoline code hands us the ucontext. 776 * It is unsafe to keep track of it ourselves, in the event that a 777 * program jumps out of a signal handler. 778 */ 779 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 780 return (EFAULT); 781 782 context = &uc.uc_mcontext; 783 784 /* 785 * Check for security violations. 786 */ 787#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 788 eflags = context->sc_eflags; 789 /* 790 * XXX do allow users to change the privileged flag PSL_RF. The 791 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 792 * sometimes set it there too. tf_eflags is kept in the signal 793 * context during signal handling and there is no other place 794 * to remember it, so the PSL_RF bit may be corrupted by the 795 * signal handler without us knowing. Corruption of the PSL_RF 796 * bit at worst causes one more or one less debugger trap, so 797 * allowing it is fairly harmless. 798 */ 799 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 800 return(EINVAL); 801 802 /* 803 * Don't allow users to load a valid privileged %cs. Let the 804 * hardware check for invalid selectors, excess privilege in 805 * other selectors, invalid %eip's and invalid %esp's. 806 */ 807#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 808 if (!CS_SECURE(context->sc_cs)) { 809 ksiginfo_init_trap(&ksi); 810 ksi.ksi_signo = SIGBUS; 811 ksi.ksi_code = BUS_OBJERR; 812 ksi.ksi_trapno = T_PROTFLT; 813 ksi.ksi_addr = (void *)regs->tf_eip; 814 trapsignal(td, &ksi); 815 return(EINVAL); 816 } 817 818 PROC_LOCK(p); 819 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); 820 SIG_CANTMASK(td->td_sigmask); 821 signotify(td); 822 PROC_UNLOCK(p); 823 824 /* 825 * Restore signal context 826 */ 827 /* %gs was restored by the trampoline. */ 828 regs->tf_fs = context->sc_fs; 829 regs->tf_es = context->sc_es; 830 regs->tf_ds = context->sc_ds; 831 regs->tf_edi = context->sc_edi; 832 regs->tf_esi = context->sc_esi; 833 regs->tf_ebp = context->sc_ebp; 834 regs->tf_ebx = context->sc_ebx; 835 regs->tf_edx = context->sc_edx; 836 regs->tf_ecx = context->sc_ecx; 837 regs->tf_eax = context->sc_eax; 838 regs->tf_eip = context->sc_eip; 839 regs->tf_cs = context->sc_cs; 840 regs->tf_eflags = eflags; 841 regs->tf_esp = context->sc_esp_at_signal; 842 regs->tf_ss = context->sc_ss; 843 844 /* 845 * call sigaltstack & ignore results.. 846 */ 847 lss = &uc.uc_stack; 848 ss.ss_sp = lss->ss_sp; 849 ss.ss_size = lss->ss_size; 850 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 851 852#ifdef DEBUG 853 if (ldebug(rt_sigreturn)) 854 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 855 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 856#endif 857 (void)kern_sigaltstack(td, &ss, NULL); 858 859 return (EJUSTRETURN); 860} 861 862/* 863 * MPSAFE 864 */ 865static void 866linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 867{ 868 args[0] = tf->tf_ebx; 869 args[1] = tf->tf_ecx; 870 args[2] = tf->tf_edx; 871 args[3] = tf->tf_esi; 872 args[4] = tf->tf_edi; 873 args[5] = tf->tf_ebp; /* Unconfirmed */ 874 *params = NULL; /* no copyin */ 875} 876 877/* 878 * If a linux binary is exec'ing something, try this image activator 879 * first. We override standard shell script execution in order to 880 * be able to modify the interpreter path. We only do this if a linux 881 * binary is doing the exec, so we do not create an EXEC module for it. 882 */ 883static int exec_linux_imgact_try(struct image_params *iparams); 884 885static int 886exec_linux_imgact_try(struct image_params *imgp) 887{ 888 const char *head = (const char *)imgp->image_header; 889 char *rpath; 890 int error = -1, len; 891 892 /* 893 * The interpreter for shell scripts run from a linux binary needs 894 * to be located in /compat/linux if possible in order to recursively 895 * maintain linux path emulation. 896 */ 897 if (((const short *)head)[0] == SHELLMAGIC) { 898 /* 899 * Run our normal shell image activator. If it succeeds attempt 900 * to use the alternate path for the interpreter. If an alternate 901 * path is found, use our stringspace to store it. 902 */ 903 if ((error = exec_shell_imgact(imgp)) == 0) { 904 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 905 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD); 906 if (rpath != NULL) { 907 len = strlen(rpath) + 1; 908 909 if (len <= MAXSHELLCMDLEN) { 910 memcpy(imgp->interpreter_name, rpath, len); 911 } 912 free(rpath, M_TEMP); 913 } 914 } 915 } 916 return(error); 917} 918 919/* 920 * exec_setregs may initialize some registers differently than Linux 921 * does, thus potentially confusing Linux binaries. If necessary, we 922 * override the exec_setregs default(s) here. 923 */ 924static void 925exec_linux_setregs(struct thread *td, u_long entry, 926 u_long stack, u_long ps_strings) 927{ 928 struct pcb *pcb = td->td_pcb; 929 930 exec_setregs(td, entry, stack, ps_strings); 931 932 /* Linux sets %gs to 0, we default to _udatasel */ 933 pcb->pcb_gs = 0; 934 load_gs(0); 935 936 pcb->pcb_initial_npxcw = __LINUX_NPXCW__; 937} 938 939static void 940linux_get_machine(const char **dst) 941{ 942 943 switch (cpu_class) { 944 case CPUCLASS_686: 945 *dst = "i686"; 946 break; 947 case CPUCLASS_586: 948 *dst = "i586"; 949 break; 950 case CPUCLASS_486: 951 *dst = "i486"; 952 break; 953 default: 954 *dst = "i386"; 955 } 956} 957 958struct sysentvec linux_sysvec = { 959 .sv_size = LINUX_SYS_MAXSYSCALL, 960 .sv_table = linux_sysent, 961 .sv_mask = 0, 962 .sv_sigsize = LINUX_SIGTBLSZ, 963 .sv_sigtbl = bsd_to_linux_signal, 964 .sv_errsize = ELAST + 1, 965 .sv_errtbl = bsd_to_linux_errno, 966 .sv_transtrap = translate_traps, 967 .sv_fixup = linux_fixup, 968 .sv_sendsig = linux_sendsig, 969 .sv_sigcode = linux_sigcode, 970 .sv_szsigcode = &linux_szsigcode, 971 .sv_prepsyscall = linux_prepsyscall, 972 .sv_name = "Linux a.out", 973 .sv_coredump = NULL, 974 .sv_imgact_try = exec_linux_imgact_try, 975 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 976 .sv_pagesize = PAGE_SIZE, 977 .sv_minuser = VM_MIN_ADDRESS, 978 .sv_maxuser = VM_MAXUSER_ADDRESS, 979 .sv_usrstack = USRSTACK, 980 .sv_psstrings = PS_STRINGS, 981 .sv_stackprot = VM_PROT_ALL, 982 .sv_copyout_strings = exec_copyout_strings, 983 .sv_setregs = exec_linux_setregs, 984 .sv_fixlimit = NULL, 985 .sv_maxssiz = NULL, 986 .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32 987}; 988 989struct sysentvec elf_linux_sysvec = { 990 .sv_size = LINUX_SYS_MAXSYSCALL, 991 .sv_table = linux_sysent, 992 .sv_mask = 0, 993 .sv_sigsize = LINUX_SIGTBLSZ, 994 .sv_sigtbl = bsd_to_linux_signal, 995 .sv_errsize = ELAST + 1, 996 .sv_errtbl = bsd_to_linux_errno, 997 .sv_transtrap = translate_traps, 998 .sv_fixup = elf_linux_fixup, 999 .sv_sendsig = linux_sendsig, 1000 .sv_sigcode = linux_sigcode, 1001 .sv_szsigcode = &linux_szsigcode, 1002 .sv_prepsyscall = linux_prepsyscall, 1003 .sv_name = "Linux ELF", 1004 .sv_coredump = elf32_coredump, 1005 .sv_imgact_try = exec_linux_imgact_try, 1006 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 1007 .sv_pagesize = PAGE_SIZE, 1008 .sv_minuser = VM_MIN_ADDRESS, 1009 .sv_maxuser = VM_MAXUSER_ADDRESS, 1010 .sv_usrstack = USRSTACK, 1011 .sv_psstrings = PS_STRINGS, 1012 .sv_stackprot = VM_PROT_ALL, 1013 .sv_copyout_strings = linux_copyout_strings, 1014 .sv_setregs = exec_linux_setregs, 1015 .sv_fixlimit = NULL, 1016 .sv_maxssiz = NULL, 1017 .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32 1018}; 1019 1020static char GNULINUX_ABI_VENDOR[] = "GNU"; 1021 1022static Elf_Brandnote linux_brandnote = { 1023 .hdr.n_namesz = sizeof(GNULINUX_ABI_VENDOR), 1024 .hdr.n_descsz = 16, 1025 .hdr.n_type = 1, 1026 .vendor = GNULINUX_ABI_VENDOR, 1027 .flags = 0 1028}; 1029 1030static Elf32_Brandinfo linux_brand = { 1031 .brand = ELFOSABI_LINUX, 1032 .machine = EM_386, 1033 .compat_3_brand = "Linux", 1034 .emul_path = "/compat/linux", 1035 .interp_path = "/lib/ld-linux.so.1", 1036 .sysvec = &elf_linux_sysvec, 1037 .interp_newpath = NULL, 1038 .brand_note = &linux_brandnote, 1039 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1040}; 1041 1042static Elf32_Brandinfo linux_glibc2brand = { 1043 .brand = ELFOSABI_LINUX, 1044 .machine = EM_386, 1045 .compat_3_brand = "Linux", 1046 .emul_path = "/compat/linux", 1047 .interp_path = "/lib/ld-linux.so.2", 1048 .sysvec = &elf_linux_sysvec, 1049 .interp_newpath = NULL, 1050 .brand_note = &linux_brandnote, 1051 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1052}; 1053 1054Elf32_Brandinfo *linux_brandlist[] = { 1055 &linux_brand, 1056 &linux_glibc2brand, 1057 NULL 1058}; 1059 1060static int 1061linux_elf_modevent(module_t mod, int type, void *data) 1062{ 1063 Elf32_Brandinfo **brandinfo; 1064 int error; 1065 struct linux_ioctl_handler **lihp; 1066 struct linux_device_handler **ldhp; 1067 1068 error = 0; 1069 1070 switch(type) { 1071 case MOD_LOAD: 1072 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1073 ++brandinfo) 1074 if (elf32_insert_brand_entry(*brandinfo) < 0) 1075 error = EINVAL; 1076 if (error == 0) { 1077 SET_FOREACH(lihp, linux_ioctl_handler_set) 1078 linux_ioctl_register_handler(*lihp); 1079 SET_FOREACH(ldhp, linux_device_handler_set) 1080 linux_device_register_handler(*ldhp); 1081 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF); 1082 sx_init(&emul_shared_lock, "emuldata->shared lock"); 1083 LIST_INIT(&futex_list); 1084 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); 1085 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit, 1086 NULL, 1000); 1087 linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail, 1088 NULL, 1000); 1089 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec, 1090 NULL, 1000); 1091 linux_get_machine(&linux_platform); 1092 linux_szplatform = roundup(strlen(linux_platform) + 1, 1093 sizeof(char *)); 1094 linux_osd_jail_register(); 1095 stclohz = (stathz ? stathz : hz); 1096 if (bootverbose) 1097 printf("Linux ELF exec handler installed\n"); 1098 } else 1099 printf("cannot insert Linux ELF brand handler\n"); 1100 break; 1101 case MOD_UNLOAD: 1102 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1103 ++brandinfo) 1104 if (elf32_brand_inuse(*brandinfo)) 1105 error = EBUSY; 1106 if (error == 0) { 1107 for (brandinfo = &linux_brandlist[0]; 1108 *brandinfo != NULL; ++brandinfo) 1109 if (elf32_remove_brand_entry(*brandinfo) < 0) 1110 error = EINVAL; 1111 } 1112 if (error == 0) { 1113 SET_FOREACH(lihp, linux_ioctl_handler_set) 1114 linux_ioctl_unregister_handler(*lihp); 1115 SET_FOREACH(ldhp, linux_device_handler_set) 1116 linux_device_unregister_handler(*ldhp); 1117 mtx_destroy(&emul_lock); 1118 sx_destroy(&emul_shared_lock); 1119 mtx_destroy(&futex_mtx); 1120 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); 1121 EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag); 1122 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); 1123 linux_osd_jail_deregister(); 1124 if (bootverbose) 1125 printf("Linux ELF exec handler removed\n"); 1126 } else 1127 printf("Could not deinstall ELF interpreter entry\n"); 1128 break; 1129 default: 1130 return EOPNOTSUPP; 1131 } 1132 return error; 1133} 1134 1135static moduledata_t linux_elf_mod = { 1136 "linuxelf", 1137 linux_elf_modevent, 1138 0 1139}; 1140 1141DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 1142