linux_sysvec.c revision 161310
1/*- 2 * Copyright (c) 1994-1996 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: head/sys/i386/linux/linux_sysvec.c 161310 2006-08-15 12:54:30Z netchild $"); 31 32#include <sys/param.h> 33#include <sys/systm.h> 34#include <sys/exec.h> 35#include <sys/imgact.h> 36#include <sys/imgact_aout.h> 37#include <sys/imgact_elf.h> 38#include <sys/kernel.h> 39#include <sys/lock.h> 40#include <sys/malloc.h> 41#include <sys/module.h> 42#include <sys/mutex.h> 43#include <sys/proc.h> 44#include <sys/signalvar.h> 45#include <sys/syscallsubr.h> 46#include <sys/sysent.h> 47#include <sys/sysproto.h> 48#include <sys/vnode.h> 49#include <sys/eventhandler.h> 50 51#include <vm/vm.h> 52#include <vm/pmap.h> 53#include <vm/vm_extern.h> 54#include <vm/vm_map.h> 55#include <vm/vm_object.h> 56#include <vm/vm_page.h> 57#include <vm/vm_param.h> 58 59#include <machine/cpu.h> 60#include <machine/md_var.h> 61#include <machine/pcb.h> 62 63#include <i386/linux/linux.h> 64#include <i386/linux/linux_proto.h> 65#include <compat/linux/linux_mib.h> 66#include <compat/linux/linux_signal.h> 67#include <compat/linux/linux_util.h> 68 69MODULE_VERSION(linux, 1); 70 71MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 72 73#if BYTE_ORDER == LITTLE_ENDIAN 74#define SHELLMAGIC 0x2123 /* #! */ 75#else 76#define SHELLMAGIC 0x2321 77#endif 78 79/* 80 * Allow the sendsig functions to use the ldebug() facility 81 * even though they are not syscalls themselves. Map them 82 * to syscall 0. This is slightly less bogus than using 83 * ldebug(sigreturn). 84 */ 85#define LINUX_SYS_linux_rt_sendsig 0 86#define LINUX_SYS_linux_sendsig 0 87 88#define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) 89#define __LINUX_NPXCW__ 0x37f 90 91extern char linux_sigcode[]; 92extern int linux_szsigcode; 93 94extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 95 96SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 97SET_DECLARE(linux_device_handler_set, struct linux_device_handler); 98 99static int linux_fixup(register_t **stack_base, 100 struct image_params *iparams); 101static int elf_linux_fixup(register_t **stack_base, 102 struct image_params *iparams); 103static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 104 caddr_t *params); 105static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 106static void exec_linux_setregs(struct thread *td, u_long entry, 107 u_long stack, u_long ps_strings); 108 109extern void linux_proc_exit(void *, struct proc *, struct image_params *); 110extern void linux_proc_exec(void *, struct proc *, struct image_params *); 111extern void linux_schedtail(void *, struct proc *); 112extern LIST_HEAD(futex_list, futex) futex_list; 113extern struct sx emul_shared_lock; 114extern struct sx emul_lock; 115extern struct mtx futex_mtx; 116 117static eventhandler_tag linux_exit_tag; 118static eventhandler_tag linux_schedtail_tag; 119static eventhandler_tag linux_exec_tag; 120 121/* 122 * Linux syscalls return negative errno's, we do positive and map them 123 * Reference: 124 * FreeBSD: src/sys/sys/errno.h 125 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h 126 * linux-2.6.17.8/include/asm-generic/errno.h 127 */ 128static int bsd_to_linux_errno[ELAST + 1] = { 129 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 130 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 131 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 132 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 133 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 134 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 135 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 136 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 137 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, 138 -72, -67, -71 139}; 140 141int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 142 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 143 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 144 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 145 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 146 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 147 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 148 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 149 0, LINUX_SIGUSR1, LINUX_SIGUSR2 150}; 151 152int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 153 SIGHUP, SIGINT, SIGQUIT, SIGILL, 154 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 155 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 156 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 157 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 158 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 159 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 160 SIGIO, SIGURG, SIGSYS 161}; 162 163#define LINUX_T_UNKNOWN 255 164static int _bsd_to_linux_trapcode[] = { 165 LINUX_T_UNKNOWN, /* 0 */ 166 6, /* 1 T_PRIVINFLT */ 167 LINUX_T_UNKNOWN, /* 2 */ 168 3, /* 3 T_BPTFLT */ 169 LINUX_T_UNKNOWN, /* 4 */ 170 LINUX_T_UNKNOWN, /* 5 */ 171 16, /* 6 T_ARITHTRAP */ 172 254, /* 7 T_ASTFLT */ 173 LINUX_T_UNKNOWN, /* 8 */ 174 13, /* 9 T_PROTFLT */ 175 1, /* 10 T_TRCTRAP */ 176 LINUX_T_UNKNOWN, /* 11 */ 177 14, /* 12 T_PAGEFLT */ 178 LINUX_T_UNKNOWN, /* 13 */ 179 17, /* 14 T_ALIGNFLT */ 180 LINUX_T_UNKNOWN, /* 15 */ 181 LINUX_T_UNKNOWN, /* 16 */ 182 LINUX_T_UNKNOWN, /* 17 */ 183 0, /* 18 T_DIVIDE */ 184 2, /* 19 T_NMI */ 185 4, /* 20 T_OFLOW */ 186 5, /* 21 T_BOUND */ 187 7, /* 22 T_DNA */ 188 8, /* 23 T_DOUBLEFLT */ 189 9, /* 24 T_FPOPFLT */ 190 10, /* 25 T_TSSFLT */ 191 11, /* 26 T_SEGNPFLT */ 192 12, /* 27 T_STKFLT */ 193 18, /* 28 T_MCHK */ 194 19, /* 29 T_XMMFLT */ 195 15 /* 30 T_RESERVED */ 196}; 197#define bsd_to_linux_trapcode(code) \ 198 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 199 _bsd_to_linux_trapcode[(code)]: \ 200 LINUX_T_UNKNOWN) 201 202/* 203 * If FreeBSD & Linux have a difference of opinion about what a trap 204 * means, deal with it here. 205 * 206 * MPSAFE 207 */ 208static int 209translate_traps(int signal, int trap_code) 210{ 211 if (signal != SIGBUS) 212 return signal; 213 switch (trap_code) { 214 case T_PROTFLT: 215 case T_TSSFLT: 216 case T_DOUBLEFLT: 217 case T_PAGEFLT: 218 return SIGSEGV; 219 default: 220 return signal; 221 } 222} 223 224static int 225linux_fixup(register_t **stack_base, struct image_params *imgp) 226{ 227 register_t *argv, *envp; 228 229 argv = *stack_base; 230 envp = *stack_base + (imgp->args->argc + 1); 231 (*stack_base)--; 232 **stack_base = (intptr_t)(void *)envp; 233 (*stack_base)--; 234 **stack_base = (intptr_t)(void *)argv; 235 (*stack_base)--; 236 **stack_base = imgp->args->argc; 237 return 0; 238} 239 240static int 241elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 242{ 243 Elf32_Auxargs *args; 244 register_t *pos; 245 246 KASSERT(curthread->td_proc == imgp->proc && 247 (curthread->td_proc->p_flag & P_SA) == 0, 248 ("unsafe elf_linux_fixup(), should be curproc")); 249 args = (Elf32_Auxargs *)imgp->auxargs; 250 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2); 251 252 if (args->trace) 253 AUXARGS_ENTRY(pos, AT_DEBUG, 1); 254 if (args->execfd != -1) 255 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 256 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 257 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 258 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 259 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 260 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 261 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 262 AUXARGS_ENTRY(pos, AT_BASE, args->base); 263 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 264 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 265 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 266 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 267 AUXARGS_ENTRY(pos, AT_NULL, 0); 268 269 free(imgp->auxargs, M_TEMP); 270 imgp->auxargs = NULL; 271 272 (*stack_base)--; 273 **stack_base = (register_t)imgp->args->argc; 274 return 0; 275} 276 277extern int _ucodesel, _udatasel; 278extern unsigned long linux_sznonrtsigcode; 279 280static void 281linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 282{ 283 struct thread *td = curthread; 284 struct proc *p = td->td_proc; 285 struct sigacts *psp; 286 struct trapframe *regs; 287 struct l_rt_sigframe *fp, frame; 288 int sig, code; 289 int oonstack; 290 291 sig = ksi->ksi_signo; 292 code = ksi->ksi_code; 293 PROC_LOCK_ASSERT(p, MA_OWNED); 294 psp = p->p_sigacts; 295 mtx_assert(&psp->ps_mtx, MA_OWNED); 296 regs = td->td_frame; 297 oonstack = sigonstack(regs->tf_esp); 298 299#ifdef DEBUG 300 if (ldebug(rt_sendsig)) 301 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 302 catcher, sig, (void*)mask, code); 303#endif 304 /* 305 * Allocate space for the signal handler context. 306 */ 307 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 308 SIGISMEMBER(psp->ps_sigonstack, sig)) { 309 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 310 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 311 } else 312 fp = (struct l_rt_sigframe *)regs->tf_esp - 1; 313 mtx_unlock(&psp->ps_mtx); 314 315 /* 316 * Build the argument list for the signal handler. 317 */ 318 if (p->p_sysent->sv_sigtbl) 319 if (sig <= p->p_sysent->sv_sigsize) 320 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 321 322 bzero(&frame, sizeof(frame)); 323 324 frame.sf_handler = catcher; 325 frame.sf_sig = sig; 326 frame.sf_siginfo = &fp->sf_si; 327 frame.sf_ucontext = &fp->sf_sc; 328 329 /* Fill in POSIX parts */ 330 frame.sf_si.lsi_signo = sig; 331 frame.sf_si.lsi_code = code; 332 frame.sf_si.lsi_addr = ksi->ksi_addr; 333 334 /* 335 * Build the signal context to be used by sigreturn. 336 */ 337 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 338 frame.sf_sc.uc_link = NULL; /* XXX ??? */ 339 340 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp; 341 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 342 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 343 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 344 PROC_UNLOCK(p); 345 346 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 347 348 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 349 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 350 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 351 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 352 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 353 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; 354 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; 355 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; 356 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; 357 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; 358 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; 359 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; 360 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; 361 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 362 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; 363 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; 364 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 365 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 366 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 367 368#ifdef DEBUG 369 if (ldebug(rt_sendsig)) 370 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 371 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 372 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 373#endif 374 375 if (copyout(&frame, fp, sizeof(frame)) != 0) { 376 /* 377 * Process has trashed its stack; give it an illegal 378 * instruction to halt it in its tracks. 379 */ 380#ifdef DEBUG 381 if (ldebug(rt_sendsig)) 382 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 383 fp, oonstack); 384#endif 385 PROC_LOCK(p); 386 sigexit(td, SIGILL); 387 } 388 389 /* 390 * Build context to run handler in. 391 */ 392 regs->tf_esp = (int)fp; 393 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 394 linux_sznonrtsigcode; 395 regs->tf_eflags &= ~(PSL_T | PSL_VM); 396 regs->tf_cs = _ucodesel; 397 regs->tf_ds = _udatasel; 398 regs->tf_es = _udatasel; 399 regs->tf_fs = _udatasel; 400 regs->tf_ss = _udatasel; 401 PROC_LOCK(p); 402 mtx_lock(&psp->ps_mtx); 403} 404 405 406/* 407 * Send an interrupt to process. 408 * 409 * Stack is set up to allow sigcode stored 410 * in u. to call routine, followed by kcall 411 * to sigreturn routine below. After sigreturn 412 * resets the signal mask, the stack, and the 413 * frame pointer, it returns to the user 414 * specified pc, psl. 415 */ 416static void 417linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 418{ 419 struct thread *td = curthread; 420 struct proc *p = td->td_proc; 421 struct sigacts *psp; 422 struct trapframe *regs; 423 struct l_sigframe *fp, frame; 424 l_sigset_t lmask; 425 int sig, code; 426 int oonstack, i; 427 428 PROC_LOCK_ASSERT(p, MA_OWNED); 429 psp = p->p_sigacts; 430 sig = ksi->ksi_signo; 431 code = ksi->ksi_code; 432 mtx_assert(&psp->ps_mtx, MA_OWNED); 433 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 434 /* Signal handler installed with SA_SIGINFO. */ 435 linux_rt_sendsig(catcher, ksi, mask); 436 return; 437 } 438 regs = td->td_frame; 439 oonstack = sigonstack(regs->tf_esp); 440 441#ifdef DEBUG 442 if (ldebug(sendsig)) 443 printf(ARGS(sendsig, "%p, %d, %p, %u"), 444 catcher, sig, (void*)mask, code); 445#endif 446 447 /* 448 * Allocate space for the signal handler context. 449 */ 450 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 451 SIGISMEMBER(psp->ps_sigonstack, sig)) { 452 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 453 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 454 } else 455 fp = (struct l_sigframe *)regs->tf_esp - 1; 456 mtx_unlock(&psp->ps_mtx); 457 PROC_UNLOCK(p); 458 459 /* 460 * Build the argument list for the signal handler. 461 */ 462 if (p->p_sysent->sv_sigtbl) 463 if (sig <= p->p_sysent->sv_sigsize) 464 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 465 466 bzero(&frame, sizeof(frame)); 467 468 frame.sf_handler = catcher; 469 frame.sf_sig = sig; 470 471 bsd_to_linux_sigset(mask, &lmask); 472 473 /* 474 * Build the signal context to be used by sigreturn. 475 */ 476 frame.sf_sc.sc_mask = lmask.__bits[0]; 477 frame.sf_sc.sc_gs = rgs(); 478 frame.sf_sc.sc_fs = regs->tf_fs; 479 frame.sf_sc.sc_es = regs->tf_es; 480 frame.sf_sc.sc_ds = regs->tf_ds; 481 frame.sf_sc.sc_edi = regs->tf_edi; 482 frame.sf_sc.sc_esi = regs->tf_esi; 483 frame.sf_sc.sc_ebp = regs->tf_ebp; 484 frame.sf_sc.sc_ebx = regs->tf_ebx; 485 frame.sf_sc.sc_edx = regs->tf_edx; 486 frame.sf_sc.sc_ecx = regs->tf_ecx; 487 frame.sf_sc.sc_eax = regs->tf_eax; 488 frame.sf_sc.sc_eip = regs->tf_eip; 489 frame.sf_sc.sc_cs = regs->tf_cs; 490 frame.sf_sc.sc_eflags = regs->tf_eflags; 491 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 492 frame.sf_sc.sc_ss = regs->tf_ss; 493 frame.sf_sc.sc_err = regs->tf_err; 494 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno); 495 496 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 497 frame.sf_extramask[i] = lmask.__bits[i+1]; 498 499 if (copyout(&frame, fp, sizeof(frame)) != 0) { 500 /* 501 * Process has trashed its stack; give it an illegal 502 * instruction to halt it in its tracks. 503 */ 504 PROC_LOCK(p); 505 sigexit(td, SIGILL); 506 } 507 508 /* 509 * Build context to run handler in. 510 */ 511 regs->tf_esp = (int)fp; 512 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); 513 regs->tf_eflags &= ~(PSL_T | PSL_VM); 514 regs->tf_cs = _ucodesel; 515 regs->tf_ds = _udatasel; 516 regs->tf_es = _udatasel; 517 regs->tf_fs = _udatasel; 518 regs->tf_ss = _udatasel; 519 PROC_LOCK(p); 520 mtx_lock(&psp->ps_mtx); 521} 522 523/* 524 * System call to cleanup state after a signal 525 * has been taken. Reset signal mask and 526 * stack state from context left by sendsig (above). 527 * Return to previous pc and psl as specified by 528 * context left by sendsig. Check carefully to 529 * make sure that the user has not modified the 530 * psl to gain improper privileges or to cause 531 * a machine fault. 532 */ 533int 534linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 535{ 536 struct proc *p = td->td_proc; 537 struct l_sigframe frame; 538 struct trapframe *regs; 539 l_sigset_t lmask; 540 int eflags, i; 541 ksiginfo_t ksi; 542 543 regs = td->td_frame; 544 545#ifdef DEBUG 546 if (ldebug(sigreturn)) 547 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 548#endif 549 /* 550 * The trampoline code hands us the sigframe. 551 * It is unsafe to keep track of it ourselves, in the event that a 552 * program jumps out of a signal handler. 553 */ 554 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 555 return (EFAULT); 556 557 /* 558 * Check for security violations. 559 */ 560#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 561 eflags = frame.sf_sc.sc_eflags; 562 /* 563 * XXX do allow users to change the privileged flag PSL_RF. The 564 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 565 * sometimes set it there too. tf_eflags is kept in the signal 566 * context during signal handling and there is no other place 567 * to remember it, so the PSL_RF bit may be corrupted by the 568 * signal handler without us knowing. Corruption of the PSL_RF 569 * bit at worst causes one more or one less debugger trap, so 570 * allowing it is fairly harmless. 571 */ 572 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 573 return(EINVAL); 574 575 /* 576 * Don't allow users to load a valid privileged %cs. Let the 577 * hardware check for invalid selectors, excess privilege in 578 * other selectors, invalid %eip's and invalid %esp's. 579 */ 580#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 581 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 582 ksiginfo_init_trap(&ksi); 583 ksi.ksi_signo = SIGBUS; 584 ksi.ksi_code = BUS_OBJERR; 585 ksi.ksi_trapno = T_PROTFLT; 586 ksi.ksi_addr = (void *)regs->tf_eip; 587 trapsignal(td, &ksi); 588 return(EINVAL); 589 } 590 591 lmask.__bits[0] = frame.sf_sc.sc_mask; 592 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 593 lmask.__bits[i+1] = frame.sf_extramask[i]; 594 PROC_LOCK(p); 595 linux_to_bsd_sigset(&lmask, &td->td_sigmask); 596 SIG_CANTMASK(td->td_sigmask); 597 signotify(td); 598 PROC_UNLOCK(p); 599 600 /* 601 * Restore signal context. 602 */ 603 /* %gs was restored by the trampoline. */ 604 regs->tf_fs = frame.sf_sc.sc_fs; 605 regs->tf_es = frame.sf_sc.sc_es; 606 regs->tf_ds = frame.sf_sc.sc_ds; 607 regs->tf_edi = frame.sf_sc.sc_edi; 608 regs->tf_esi = frame.sf_sc.sc_esi; 609 regs->tf_ebp = frame.sf_sc.sc_ebp; 610 regs->tf_ebx = frame.sf_sc.sc_ebx; 611 regs->tf_edx = frame.sf_sc.sc_edx; 612 regs->tf_ecx = frame.sf_sc.sc_ecx; 613 regs->tf_eax = frame.sf_sc.sc_eax; 614 regs->tf_eip = frame.sf_sc.sc_eip; 615 regs->tf_cs = frame.sf_sc.sc_cs; 616 regs->tf_eflags = eflags; 617 regs->tf_esp = frame.sf_sc.sc_esp_at_signal; 618 regs->tf_ss = frame.sf_sc.sc_ss; 619 620 return (EJUSTRETURN); 621} 622 623/* 624 * System call to cleanup state after a signal 625 * has been taken. Reset signal mask and 626 * stack state from context left by rt_sendsig (above). 627 * Return to previous pc and psl as specified by 628 * context left by sendsig. Check carefully to 629 * make sure that the user has not modified the 630 * psl to gain improper privileges or to cause 631 * a machine fault. 632 */ 633int 634linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 635{ 636 struct proc *p = td->td_proc; 637 struct l_ucontext uc; 638 struct l_sigcontext *context; 639 l_stack_t *lss; 640 stack_t ss; 641 struct trapframe *regs; 642 int eflags; 643 ksiginfo_t ksi; 644 645 regs = td->td_frame; 646 647#ifdef DEBUG 648 if (ldebug(rt_sigreturn)) 649 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 650#endif 651 /* 652 * The trampoline code hands us the ucontext. 653 * It is unsafe to keep track of it ourselves, in the event that a 654 * program jumps out of a signal handler. 655 */ 656 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 657 return (EFAULT); 658 659 context = &uc.uc_mcontext; 660 661 /* 662 * Check for security violations. 663 */ 664#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 665 eflags = context->sc_eflags; 666 /* 667 * XXX do allow users to change the privileged flag PSL_RF. The 668 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 669 * sometimes set it there too. tf_eflags is kept in the signal 670 * context during signal handling and there is no other place 671 * to remember it, so the PSL_RF bit may be corrupted by the 672 * signal handler without us knowing. Corruption of the PSL_RF 673 * bit at worst causes one more or one less debugger trap, so 674 * allowing it is fairly harmless. 675 */ 676 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 677 return(EINVAL); 678 679 /* 680 * Don't allow users to load a valid privileged %cs. Let the 681 * hardware check for invalid selectors, excess privilege in 682 * other selectors, invalid %eip's and invalid %esp's. 683 */ 684#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 685 if (!CS_SECURE(context->sc_cs)) { 686 ksiginfo_init_trap(&ksi); 687 ksi.ksi_signo = SIGBUS; 688 ksi.ksi_code = BUS_OBJERR; 689 ksi.ksi_trapno = T_PROTFLT; 690 ksi.ksi_addr = (void *)regs->tf_eip; 691 trapsignal(td, &ksi); 692 return(EINVAL); 693 } 694 695 PROC_LOCK(p); 696 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); 697 SIG_CANTMASK(td->td_sigmask); 698 signotify(td); 699 PROC_UNLOCK(p); 700 701 /* 702 * Restore signal context 703 */ 704 /* %gs was restored by the trampoline. */ 705 regs->tf_fs = context->sc_fs; 706 regs->tf_es = context->sc_es; 707 regs->tf_ds = context->sc_ds; 708 regs->tf_edi = context->sc_edi; 709 regs->tf_esi = context->sc_esi; 710 regs->tf_ebp = context->sc_ebp; 711 regs->tf_ebx = context->sc_ebx; 712 regs->tf_edx = context->sc_edx; 713 regs->tf_ecx = context->sc_ecx; 714 regs->tf_eax = context->sc_eax; 715 regs->tf_eip = context->sc_eip; 716 regs->tf_cs = context->sc_cs; 717 regs->tf_eflags = eflags; 718 regs->tf_esp = context->sc_esp_at_signal; 719 regs->tf_ss = context->sc_ss; 720 721 /* 722 * call sigaltstack & ignore results.. 723 */ 724 lss = &uc.uc_stack; 725 ss.ss_sp = lss->ss_sp; 726 ss.ss_size = lss->ss_size; 727 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 728 729#ifdef DEBUG 730 if (ldebug(rt_sigreturn)) 731 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 732 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 733#endif 734 (void)kern_sigaltstack(td, &ss, NULL); 735 736 return (EJUSTRETURN); 737} 738 739/* 740 * MPSAFE 741 */ 742static void 743linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 744{ 745 args[0] = tf->tf_ebx; 746 args[1] = tf->tf_ecx; 747 args[2] = tf->tf_edx; 748 args[3] = tf->tf_esi; 749 args[4] = tf->tf_edi; 750 args[5] = tf->tf_ebp; /* Unconfirmed */ 751 *params = NULL; /* no copyin */ 752} 753 754/* 755 * If a linux binary is exec'ing something, try this image activator 756 * first. We override standard shell script execution in order to 757 * be able to modify the interpreter path. We only do this if a linux 758 * binary is doing the exec, so we do not create an EXEC module for it. 759 */ 760static int exec_linux_imgact_try(struct image_params *iparams); 761 762static int 763exec_linux_imgact_try(struct image_params *imgp) 764{ 765 const char *head = (const char *)imgp->image_header; 766 char *rpath; 767 int error = -1, len; 768 769 /* 770 * The interpreter for shell scripts run from a linux binary needs 771 * to be located in /compat/linux if possible in order to recursively 772 * maintain linux path emulation. 773 */ 774 if (((const short *)head)[0] == SHELLMAGIC) { 775 /* 776 * Run our normal shell image activator. If it succeeds attempt 777 * to use the alternate path for the interpreter. If an alternate 778 * path is found, use our stringspace to store it. 779 */ 780 if ((error = exec_shell_imgact(imgp)) == 0) { 781 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 782 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0); 783 if (rpath != NULL) { 784 len = strlen(rpath) + 1; 785 786 if (len <= MAXSHELLCMDLEN) { 787 memcpy(imgp->interpreter_name, rpath, len); 788 } 789 free(rpath, M_TEMP); 790 } 791 } 792 } 793 return(error); 794} 795 796/* 797 * exec_setregs may initialize some registers differently than Linux 798 * does, thus potentially confusing Linux binaries. If necessary, we 799 * override the exec_setregs default(s) here. 800 */ 801static void 802exec_linux_setregs(struct thread *td, u_long entry, 803 u_long stack, u_long ps_strings) 804{ 805 static const u_short control = __LINUX_NPXCW__; 806 struct pcb *pcb = td->td_pcb; 807 808 exec_setregs(td, entry, stack, ps_strings); 809 810 /* Linux sets %gs to 0, we default to _udatasel */ 811 pcb->pcb_gs = 0; load_gs(0); 812 813 /* Linux sets the i387 to extended precision. */ 814 fldcw(&control); 815} 816 817struct sysentvec linux_sysvec = { 818 LINUX_SYS_MAXSYSCALL, 819 linux_sysent, 820 0, 821 LINUX_SIGTBLSZ, 822 bsd_to_linux_signal, 823 ELAST + 1, 824 bsd_to_linux_errno, 825 translate_traps, 826 linux_fixup, 827 linux_sendsig, 828 linux_sigcode, 829 &linux_szsigcode, 830 linux_prepsyscall, 831 "Linux a.out", 832 NULL, 833 exec_linux_imgact_try, 834 LINUX_MINSIGSTKSZ, 835 PAGE_SIZE, 836 VM_MIN_ADDRESS, 837 VM_MAXUSER_ADDRESS, 838 USRSTACK, 839 PS_STRINGS, 840 VM_PROT_ALL, 841 exec_copyout_strings, 842 exec_linux_setregs, 843 NULL 844}; 845 846struct sysentvec elf_linux_sysvec = { 847 LINUX_SYS_MAXSYSCALL, 848 linux_sysent, 849 0, 850 LINUX_SIGTBLSZ, 851 bsd_to_linux_signal, 852 ELAST + 1, 853 bsd_to_linux_errno, 854 translate_traps, 855 elf_linux_fixup, 856 linux_sendsig, 857 linux_sigcode, 858 &linux_szsigcode, 859 linux_prepsyscall, 860 "Linux ELF", 861 elf32_coredump, 862 exec_linux_imgact_try, 863 LINUX_MINSIGSTKSZ, 864 PAGE_SIZE, 865 VM_MIN_ADDRESS, 866 VM_MAXUSER_ADDRESS, 867 USRSTACK, 868 PS_STRINGS, 869 VM_PROT_ALL, 870 exec_copyout_strings, 871 exec_linux_setregs, 872 NULL 873}; 874 875static Elf32_Brandinfo linux_brand = { 876 ELFOSABI_LINUX, 877 EM_386, 878 "Linux", 879 "/compat/linux", 880 "/lib/ld-linux.so.1", 881 &elf_linux_sysvec, 882 NULL, 883 BI_CAN_EXEC_DYN, 884 }; 885 886static Elf32_Brandinfo linux_glibc2brand = { 887 ELFOSABI_LINUX, 888 EM_386, 889 "Linux", 890 "/compat/linux", 891 "/lib/ld-linux.so.2", 892 &elf_linux_sysvec, 893 NULL, 894 BI_CAN_EXEC_DYN, 895 }; 896 897Elf32_Brandinfo *linux_brandlist[] = { 898 &linux_brand, 899 &linux_glibc2brand, 900 NULL 901 }; 902 903static int 904linux_elf_modevent(module_t mod, int type, void *data) 905{ 906 Elf32_Brandinfo **brandinfo; 907 int error; 908 struct linux_ioctl_handler **lihp; 909 struct linux_device_handler **ldhp; 910 911 error = 0; 912 913 switch(type) { 914 case MOD_LOAD: 915 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 916 ++brandinfo) 917 if (elf32_insert_brand_entry(*brandinfo) < 0) 918 error = EINVAL; 919 if (error == 0) { 920 SET_FOREACH(lihp, linux_ioctl_handler_set) 921 linux_ioctl_register_handler(*lihp); 922 SET_FOREACH(ldhp, linux_device_handler_set) 923 linux_device_register_handler(*ldhp); 924 sx_init(&emul_lock, "emuldata lock"); 925 sx_init(&emul_shared_lock, "emuldata->shared lock"); 926 LIST_INIT(&futex_list); 927 mtx_init(&futex_mtx, "futex protection lock", NULL, MTX_DEF); 928 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit, 929 NULL, 1000); 930 linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail, 931 NULL, 1000); 932 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec, 933 NULL, 1000); 934 if (bootverbose) 935 printf("Linux ELF exec handler installed\n"); 936 } else 937 printf("cannot insert Linux ELF brand handler\n"); 938 break; 939 case MOD_UNLOAD: 940 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 941 ++brandinfo) 942 if (elf32_brand_inuse(*brandinfo)) 943 error = EBUSY; 944 if (error == 0) { 945 for (brandinfo = &linux_brandlist[0]; 946 *brandinfo != NULL; ++brandinfo) 947 if (elf32_remove_brand_entry(*brandinfo) < 0) 948 error = EINVAL; 949 } 950 if (error == 0) { 951 SET_FOREACH(lihp, linux_ioctl_handler_set) 952 linux_ioctl_unregister_handler(*lihp); 953 SET_FOREACH(ldhp, linux_device_handler_set) 954 linux_device_unregister_handler(*ldhp); 955 sx_destroy(&emul_lock); 956 sx_destroy(&emul_shared_lock); 957 mtx_destroy(&futex_mtx); 958 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); 959 EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag); 960 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); 961 if (bootverbose) 962 printf("Linux ELF exec handler removed\n"); 963 } else 964 printf("Could not deinstall ELF interpreter entry\n"); 965 break; 966 default: 967 return EOPNOTSUPP; 968 } 969 return error; 970} 971 972static moduledata_t linux_elf_mod = { 973 "linuxelf", 974 linux_elf_modevent, 975 0 976}; 977 978DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 979