linux_sysvec.c revision 112682
1/*- 2 * Copyright (c) 1994-1996 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $FreeBSD: head/sys/i386/linux/linux_sysvec.c 112682 2003-03-26 18:29:44Z jhb $ 29 */ 30 31/* XXX we use functions that might not exist. */ 32#include "opt_compat.h" 33 34#ifndef COMPAT_43 35#error "Unable to compile Linux-emulator due to missing COMPAT_43 option!" 36#endif 37 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/imgact.h> 41#include <sys/imgact_aout.h> 42#include <sys/imgact_elf.h> 43#include <sys/lock.h> 44#include <sys/malloc.h> 45#include <sys/mutex.h> 46#include <sys/proc.h> 47#include <sys/signalvar.h> 48#include <sys/syscallsubr.h> 49#include <sys/sysent.h> 50#include <sys/sysproto.h> 51#include <sys/user.h> 52#include <sys/vnode.h> 53 54#include <vm/vm.h> 55#include <vm/vm_param.h> 56#include <vm/vm_page.h> 57#include <vm/vm_extern.h> 58#include <sys/exec.h> 59#include <sys/kernel.h> 60#include <sys/module.h> 61#include <machine/cpu.h> 62#include <machine/md_var.h> 63#include <sys/mutex.h> 64 65#include <vm/vm.h> 66#include <vm/vm_param.h> 67#include <vm/pmap.h> 68#include <vm/vm_map.h> 69#include <vm/vm_object.h> 70 71#include <i386/linux/linux.h> 72#include <i386/linux/linux_proto.h> 73#include <compat/linux/linux_signal.h> 74#include <compat/linux/linux_util.h> 75 76MODULE_VERSION(linux, 1); 77MODULE_DEPEND(linux, sysvmsg, 1, 1, 1); 78MODULE_DEPEND(linux, sysvsem, 1, 1, 1); 79MODULE_DEPEND(linux, sysvshm, 1, 1, 1); 80 81MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 82 83#if BYTE_ORDER == LITTLE_ENDIAN 84#define SHELLMAGIC 0x2123 /* #! */ 85#else 86#define SHELLMAGIC 0x2321 87#endif 88 89/* 90 * Allow the sendsig functions to use the ldebug() facility 91 * even though they are not syscalls themselves. Map them 92 * to syscall 0. This is slightly less bogus than using 93 * ldebug(sigreturn). 94 */ 95#define LINUX_SYS_linux_rt_sendsig 0 96#define LINUX_SYS_linux_sendsig 0 97 98extern char linux_sigcode[]; 99extern int linux_szsigcode; 100 101extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 102 103SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 104 105static int linux_fixup(register_t **stack_base, 106 struct image_params *iparams); 107static int elf_linux_fixup(register_t **stack_base, 108 struct image_params *iparams); 109static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 110 caddr_t *params); 111static void linux_sendsig(sig_t catcher, int sig, sigset_t *mask, 112 u_long code); 113 114/* 115 * Linux syscalls return negative errno's, we do positive and map them 116 */ 117static int bsd_to_linux_errno[ELAST + 1] = { 118 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 119 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 120 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 121 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 122 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 123 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 124 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 125 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 126 -6, -6, -43, -42, -75, -6, -84 127}; 128 129int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 130 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 131 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 132 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 133 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 134 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 135 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 136 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 137 0, LINUX_SIGUSR1, LINUX_SIGUSR2 138}; 139 140int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 141 SIGHUP, SIGINT, SIGQUIT, SIGILL, 142 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 143 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 144 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 145 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 146 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 147 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 148 SIGIO, SIGURG, SIGSYS 149}; 150 151#define LINUX_T_UNKNOWN 255 152static int _bsd_to_linux_trapcode[] = { 153 LINUX_T_UNKNOWN, /* 0 */ 154 6, /* 1 T_PRIVINFLT */ 155 LINUX_T_UNKNOWN, /* 2 */ 156 3, /* 3 T_BPTFLT */ 157 LINUX_T_UNKNOWN, /* 4 */ 158 LINUX_T_UNKNOWN, /* 5 */ 159 16, /* 6 T_ARITHTRAP */ 160 254, /* 7 T_ASTFLT */ 161 LINUX_T_UNKNOWN, /* 8 */ 162 13, /* 9 T_PROTFLT */ 163 1, /* 10 T_TRCTRAP */ 164 LINUX_T_UNKNOWN, /* 11 */ 165 14, /* 12 T_PAGEFLT */ 166 LINUX_T_UNKNOWN, /* 13 */ 167 17, /* 14 T_ALIGNFLT */ 168 LINUX_T_UNKNOWN, /* 15 */ 169 LINUX_T_UNKNOWN, /* 16 */ 170 LINUX_T_UNKNOWN, /* 17 */ 171 0, /* 18 T_DIVIDE */ 172 2, /* 19 T_NMI */ 173 4, /* 20 T_OFLOW */ 174 5, /* 21 T_BOUND */ 175 7, /* 22 T_DNA */ 176 8, /* 23 T_DOUBLEFLT */ 177 9, /* 24 T_FPOPFLT */ 178 10, /* 25 T_TSSFLT */ 179 11, /* 26 T_SEGNPFLT */ 180 12, /* 27 T_STKFLT */ 181 18, /* 28 T_MCHK */ 182 19, /* 29 T_XMMFLT */ 183 15 /* 30 T_RESERVED */ 184}; 185#define bsd_to_linux_trapcode(code) \ 186 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 187 _bsd_to_linux_trapcode[(code)]: \ 188 LINUX_T_UNKNOWN) 189 190/* 191 * If FreeBSD & Linux have a difference of opinion about what a trap 192 * means, deal with it here. 193 * 194 * MPSAFE 195 */ 196static int 197translate_traps(int signal, int trap_code) 198{ 199 if (signal != SIGBUS) 200 return signal; 201 switch (trap_code) { 202 case T_PROTFLT: 203 case T_TSSFLT: 204 case T_DOUBLEFLT: 205 case T_PAGEFLT: 206 return SIGSEGV; 207 default: 208 return signal; 209 } 210} 211 212static int 213linux_fixup(register_t **stack_base, struct image_params *imgp) 214{ 215 register_t *argv, *envp; 216 217 argv = *stack_base; 218 envp = *stack_base + (imgp->argc + 1); 219 (*stack_base)--; 220 **stack_base = (intptr_t)(void *)envp; 221 (*stack_base)--; 222 **stack_base = (intptr_t)(void *)argv; 223 (*stack_base)--; 224 **stack_base = imgp->argc; 225 return 0; 226} 227 228static int 229elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 230{ 231 Elf32_Auxargs *args; 232 register_t *pos; 233 234 KASSERT(curthread->td_proc == imgp->proc && 235 (curthread->td_proc->p_flag & P_THREADED) == 0, 236 ("unsafe elf_linux_fixup(), should be curproc")); 237 args = (Elf32_Auxargs *)imgp->auxargs; 238 pos = *stack_base + (imgp->argc + imgp->envc + 2); 239 240 if (args->trace) 241 AUXARGS_ENTRY(pos, AT_DEBUG, 1); 242 if (args->execfd != -1) 243 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 244 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 245 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 246 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 247 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 248 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 249 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 250 AUXARGS_ENTRY(pos, AT_BASE, args->base); 251 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 252 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 253 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 254 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 255 AUXARGS_ENTRY(pos, AT_NULL, 0); 256 257 free(imgp->auxargs, M_TEMP); 258 imgp->auxargs = NULL; 259 260 (*stack_base)--; 261 **stack_base = (register_t)imgp->argc; 262 return 0; 263} 264 265extern int _ucodesel, _udatasel; 266extern unsigned long linux_sznonrtsigcode; 267 268static void 269linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 270{ 271 struct thread *td = curthread; 272 struct proc *p = td->td_proc; 273 struct trapframe *regs; 274 struct l_rt_sigframe *fp, frame; 275 int oonstack; 276 277 PROC_LOCK_ASSERT(p, MA_OWNED); 278 regs = td->td_frame; 279 oonstack = sigonstack(regs->tf_esp); 280 281#ifdef DEBUG 282 if (ldebug(rt_sendsig)) 283 printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"), 284 catcher, sig, (void*)mask, code); 285#endif 286 /* 287 * Allocate space for the signal handler context. 288 */ 289 if ((p->p_flag & P_ALTSTACK) && !oonstack && 290 SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) { 291 fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp + 292 p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 293 } else 294 fp = (struct l_rt_sigframe *)regs->tf_esp - 1; 295 PROC_UNLOCK(p); 296 297 /* 298 * Build the argument list for the signal handler. 299 */ 300 if (p->p_sysent->sv_sigtbl) 301 if (sig <= p->p_sysent->sv_sigsize) 302 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 303 304 bzero(&frame, sizeof(frame)); 305 306 frame.sf_handler = catcher; 307 frame.sf_sig = sig; 308 frame.sf_siginfo = &fp->sf_si; 309 frame.sf_ucontext = &fp->sf_sc; 310 311 /* Fill in POSIX parts */ 312 frame.sf_si.lsi_signo = sig; 313 frame.sf_si.lsi_code = code; 314 frame.sf_si.lsi_addr = (void *)regs->tf_err; 315 316 /* 317 * Build the signal context to be used by sigreturn. 318 */ 319 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 320 frame.sf_sc.uc_link = NULL; /* XXX ??? */ 321 322 PROC_LOCK(p); 323 frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp; 324 frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size; 325 frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK) 326 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 327 PROC_UNLOCK(p); 328 329 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 330 331 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 332 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 333 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 334 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 335 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 336 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; 337 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; 338 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; 339 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; 340 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; 341 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; 342 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; 343 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; 344 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 345 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; 346 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; 347 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 348 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 349 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 350 351#ifdef DEBUG 352 if (ldebug(rt_sendsig)) 353 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 354 frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp, 355 p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 356#endif 357 358 if (copyout(&frame, fp, sizeof(frame)) != 0) { 359 /* 360 * Process has trashed its stack; give it an illegal 361 * instruction to halt it in its tracks. 362 */ 363#ifdef DEBUG 364 if (ldebug(rt_sendsig)) 365 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 366 fp, oonstack); 367#endif 368 PROC_LOCK(p); 369 sigexit(td, SIGILL); 370 } 371 372 /* 373 * Build context to run handler in. 374 */ 375 regs->tf_esp = (int)fp; 376 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 377 linux_sznonrtsigcode; 378 regs->tf_eflags &= ~(PSL_T | PSL_VM); 379 regs->tf_cs = _ucodesel; 380 regs->tf_ds = _udatasel; 381 regs->tf_es = _udatasel; 382 regs->tf_fs = _udatasel; 383 regs->tf_ss = _udatasel; 384 PROC_LOCK(p); 385} 386 387 388/* 389 * Send an interrupt to process. 390 * 391 * Stack is set up to allow sigcode stored 392 * in u. to call routine, followed by kcall 393 * to sigreturn routine below. After sigreturn 394 * resets the signal mask, the stack, and the 395 * frame pointer, it returns to the user 396 * specified pc, psl. 397 */ 398static void 399linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 400{ 401 struct thread *td = curthread; 402 struct proc *p = td->td_proc; 403 struct trapframe *regs; 404 struct l_sigframe *fp, frame; 405 l_sigset_t lmask; 406 int oonstack, i; 407 408 PROC_LOCK_ASSERT(p, MA_OWNED); 409 if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) { 410 /* Signal handler installed with SA_SIGINFO. */ 411 linux_rt_sendsig(catcher, sig, mask, code); 412 return; 413 } 414 415 regs = td->td_frame; 416 oonstack = sigonstack(regs->tf_esp); 417 418#ifdef DEBUG 419 if (ldebug(sendsig)) 420 printf(ARGS(sendsig, "%p, %d, %p, %lu"), 421 catcher, sig, (void*)mask, code); 422#endif 423 424 /* 425 * Allocate space for the signal handler context. 426 */ 427 if ((p->p_flag & P_ALTSTACK) && !oonstack && 428 SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) { 429 fp = (struct l_sigframe *)(p->p_sigstk.ss_sp + 430 p->p_sigstk.ss_size - sizeof(struct l_sigframe)); 431 } else 432 fp = (struct l_sigframe *)regs->tf_esp - 1; 433 PROC_UNLOCK(p); 434 435 /* 436 * Build the argument list for the signal handler. 437 */ 438 if (p->p_sysent->sv_sigtbl) 439 if (sig <= p->p_sysent->sv_sigsize) 440 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 441 442 bzero(&frame, sizeof(frame)); 443 444 frame.sf_handler = catcher; 445 frame.sf_sig = sig; 446 447 bsd_to_linux_sigset(mask, &lmask); 448 449 /* 450 * Build the signal context to be used by sigreturn. 451 */ 452 frame.sf_sc.sc_mask = lmask.__bits[0]; 453 frame.sf_sc.sc_gs = rgs(); 454 frame.sf_sc.sc_fs = regs->tf_fs; 455 frame.sf_sc.sc_es = regs->tf_es; 456 frame.sf_sc.sc_ds = regs->tf_ds; 457 frame.sf_sc.sc_edi = regs->tf_edi; 458 frame.sf_sc.sc_esi = regs->tf_esi; 459 frame.sf_sc.sc_ebp = regs->tf_ebp; 460 frame.sf_sc.sc_ebx = regs->tf_ebx; 461 frame.sf_sc.sc_edx = regs->tf_edx; 462 frame.sf_sc.sc_ecx = regs->tf_ecx; 463 frame.sf_sc.sc_eax = regs->tf_eax; 464 frame.sf_sc.sc_eip = regs->tf_eip; 465 frame.sf_sc.sc_cs = regs->tf_cs; 466 frame.sf_sc.sc_eflags = regs->tf_eflags; 467 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 468 frame.sf_sc.sc_ss = regs->tf_ss; 469 frame.sf_sc.sc_err = regs->tf_err; 470 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 471 472 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 473 frame.sf_extramask[i] = lmask.__bits[i+1]; 474 475 if (copyout(&frame, fp, sizeof(frame)) != 0) { 476 /* 477 * Process has trashed its stack; give it an illegal 478 * instruction to halt it in its tracks. 479 */ 480 PROC_LOCK(p); 481 sigexit(td, SIGILL); 482 } 483 484 /* 485 * Build context to run handler in. 486 */ 487 regs->tf_esp = (int)fp; 488 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); 489 regs->tf_eflags &= ~(PSL_T | PSL_VM); 490 regs->tf_cs = _ucodesel; 491 regs->tf_ds = _udatasel; 492 regs->tf_es = _udatasel; 493 regs->tf_fs = _udatasel; 494 regs->tf_ss = _udatasel; 495 PROC_LOCK(p); 496} 497 498/* 499 * System call to cleanup state after a signal 500 * has been taken. Reset signal mask and 501 * stack state from context left by sendsig (above). 502 * Return to previous pc and psl as specified by 503 * context left by sendsig. Check carefully to 504 * make sure that the user has not modified the 505 * psl to gain improper privileges or to cause 506 * a machine fault. 507 */ 508int 509linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 510{ 511 struct proc *p = td->td_proc; 512 struct l_sigframe frame; 513 struct trapframe *regs; 514 l_sigset_t lmask; 515 int eflags, i; 516 517 regs = td->td_frame; 518 519#ifdef DEBUG 520 if (ldebug(sigreturn)) 521 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 522#endif 523 /* 524 * The trampoline code hands us the sigframe. 525 * It is unsafe to keep track of it ourselves, in the event that a 526 * program jumps out of a signal handler. 527 */ 528 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 529 return (EFAULT); 530 531 /* 532 * Check for security violations. 533 */ 534#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 535 eflags = frame.sf_sc.sc_eflags; 536 /* 537 * XXX do allow users to change the privileged flag PSL_RF. The 538 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 539 * sometimes set it there too. tf_eflags is kept in the signal 540 * context during signal handling and there is no other place 541 * to remember it, so the PSL_RF bit may be corrupted by the 542 * signal handler without us knowing. Corruption of the PSL_RF 543 * bit at worst causes one more or one less debugger trap, so 544 * allowing it is fairly harmless. 545 */ 546 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 547 return(EINVAL); 548 549 /* 550 * Don't allow users to load a valid privileged %cs. Let the 551 * hardware check for invalid selectors, excess privilege in 552 * other selectors, invalid %eip's and invalid %esp's. 553 */ 554#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 555 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 556 trapsignal(p, SIGBUS, T_PROTFLT); 557 return(EINVAL); 558 } 559 560 lmask.__bits[0] = frame.sf_sc.sc_mask; 561 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 562 lmask.__bits[i+1] = frame.sf_extramask[i]; 563 PROC_LOCK(p); 564 linux_to_bsd_sigset(&lmask, &p->p_sigmask); 565 SIG_CANTMASK(p->p_sigmask); 566 signotify(p); 567 PROC_UNLOCK(p); 568 569 /* 570 * Restore signal context. 571 */ 572 /* %gs was restored by the trampoline. */ 573 regs->tf_fs = frame.sf_sc.sc_fs; 574 regs->tf_es = frame.sf_sc.sc_es; 575 regs->tf_ds = frame.sf_sc.sc_ds; 576 regs->tf_edi = frame.sf_sc.sc_edi; 577 regs->tf_esi = frame.sf_sc.sc_esi; 578 regs->tf_ebp = frame.sf_sc.sc_ebp; 579 regs->tf_ebx = frame.sf_sc.sc_ebx; 580 regs->tf_edx = frame.sf_sc.sc_edx; 581 regs->tf_ecx = frame.sf_sc.sc_ecx; 582 regs->tf_eax = frame.sf_sc.sc_eax; 583 regs->tf_eip = frame.sf_sc.sc_eip; 584 regs->tf_cs = frame.sf_sc.sc_cs; 585 regs->tf_eflags = eflags; 586 regs->tf_esp = frame.sf_sc.sc_esp_at_signal; 587 regs->tf_ss = frame.sf_sc.sc_ss; 588 589 return (EJUSTRETURN); 590} 591 592/* 593 * System call to cleanup state after a signal 594 * has been taken. Reset signal mask and 595 * stack state from context left by rt_sendsig (above). 596 * Return to previous pc and psl as specified by 597 * context left by sendsig. Check carefully to 598 * make sure that the user has not modified the 599 * psl to gain improper privileges or to cause 600 * a machine fault. 601 */ 602int 603linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 604{ 605 struct proc *p = td->td_proc; 606 struct l_ucontext uc; 607 struct l_sigcontext *context; 608 l_stack_t *lss; 609 stack_t ss; 610 struct trapframe *regs; 611 int eflags; 612 613 regs = td->td_frame; 614 615#ifdef DEBUG 616 if (ldebug(rt_sigreturn)) 617 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 618#endif 619 /* 620 * The trampoline code hands us the ucontext. 621 * It is unsafe to keep track of it ourselves, in the event that a 622 * program jumps out of a signal handler. 623 */ 624 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 625 return (EFAULT); 626 627 context = &uc.uc_mcontext; 628 629 /* 630 * Check for security violations. 631 */ 632#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 633 eflags = context->sc_eflags; 634 /* 635 * XXX do allow users to change the privileged flag PSL_RF. The 636 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 637 * sometimes set it there too. tf_eflags is kept in the signal 638 * context during signal handling and there is no other place 639 * to remember it, so the PSL_RF bit may be corrupted by the 640 * signal handler without us knowing. Corruption of the PSL_RF 641 * bit at worst causes one more or one less debugger trap, so 642 * allowing it is fairly harmless. 643 */ 644 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 645 return(EINVAL); 646 647 /* 648 * Don't allow users to load a valid privileged %cs. Let the 649 * hardware check for invalid selectors, excess privilege in 650 * other selectors, invalid %eip's and invalid %esp's. 651 */ 652#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 653 if (!CS_SECURE(context->sc_cs)) { 654 trapsignal(p, SIGBUS, T_PROTFLT); 655 return(EINVAL); 656 } 657 658 PROC_LOCK(p); 659 linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask); 660 SIG_CANTMASK(p->p_sigmask); 661 signotify(p); 662 PROC_UNLOCK(p); 663 664 /* 665 * Restore signal context 666 */ 667 /* %gs was restored by the trampoline. */ 668 regs->tf_fs = context->sc_fs; 669 regs->tf_es = context->sc_es; 670 regs->tf_ds = context->sc_ds; 671 regs->tf_edi = context->sc_edi; 672 regs->tf_esi = context->sc_esi; 673 regs->tf_ebp = context->sc_ebp; 674 regs->tf_ebx = context->sc_ebx; 675 regs->tf_edx = context->sc_edx; 676 regs->tf_ecx = context->sc_ecx; 677 regs->tf_eax = context->sc_eax; 678 regs->tf_eip = context->sc_eip; 679 regs->tf_cs = context->sc_cs; 680 regs->tf_eflags = eflags; 681 regs->tf_esp = context->sc_esp_at_signal; 682 regs->tf_ss = context->sc_ss; 683 684 /* 685 * call sigaltstack & ignore results.. 686 */ 687 lss = &uc.uc_stack; 688 ss.ss_sp = lss->ss_sp; 689 ss.ss_size = lss->ss_size; 690 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 691 692#ifdef DEBUG 693 if (ldebug(rt_sigreturn)) 694 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 695 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 696#endif 697 (void)kern_sigaltstack(td, &ss, NULL); 698 699 return (EJUSTRETURN); 700} 701 702/* 703 * MPSAFE 704 */ 705static void 706linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 707{ 708 args[0] = tf->tf_ebx; 709 args[1] = tf->tf_ecx; 710 args[2] = tf->tf_edx; 711 args[3] = tf->tf_esi; 712 args[4] = tf->tf_edi; 713 args[5] = tf->tf_ebp; /* Unconfirmed */ 714 *params = NULL; /* no copyin */ 715} 716 717 718 719/* 720 * Dump core, into a file named as described in the comments for 721 * expand_name(), unless the process was setuid/setgid. 722 */ 723static int 724linux_aout_coredump(struct thread *td, struct vnode *vp, off_t limit) 725{ 726 struct proc *p = td->td_proc; 727 struct ucred *cred = td->td_ucred; 728 struct vmspace *vm = p->p_vmspace; 729 char *tempuser; 730 int error; 731 732 if (ctob((uarea_pages + kstack_pages) + 733 vm->vm_dsize + vm->vm_ssize) >= limit) 734 return (EFAULT); 735 tempuser = malloc(ctob(uarea_pages + kstack_pages), M_TEMP, 736 M_WAITOK | M_ZERO); 737 if (tempuser == NULL) 738 return (ENOMEM); 739 PROC_LOCK(p); 740 fill_kinfo_proc(p, &p->p_uarea->u_kproc); 741 PROC_UNLOCK(p); 742 bcopy(p->p_uarea, tempuser, sizeof(struct user)); 743 bcopy(td->td_frame, 744 tempuser + ctob(uarea_pages) + 745 ((caddr_t)td->td_frame - (caddr_t)td->td_kstack), 746 sizeof(struct trapframe)); 747 error = vn_rdwr(UIO_WRITE, vp, (caddr_t)tempuser, 748 ctob(uarea_pages + kstack_pages), 749 (off_t)0, UIO_SYSSPACE, IO_UNIT, cred, NOCRED, 750 (int *)NULL, td); 751 free(tempuser, M_TEMP); 752 if (error == 0) 753 error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr, 754 (int)ctob(vm->vm_dsize), 755 (off_t)ctob(uarea_pages + kstack_pages), UIO_USERSPACE, 756 IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td); 757 if (error == 0) 758 error = vn_rdwr_inchunks(UIO_WRITE, vp, 759 (caddr_t)trunc_page(USRSTACK - ctob(vm->vm_ssize)), 760 round_page(ctob(vm->vm_ssize)), 761 (off_t)ctob(uarea_pages + kstack_pages) + 762 ctob(vm->vm_dsize), UIO_USERSPACE, 763 IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td); 764 return (error); 765} 766/* 767 * If a linux binary is exec'ing something, try this image activator 768 * first. We override standard shell script execution in order to 769 * be able to modify the interpreter path. We only do this if a linux 770 * binary is doing the exec, so we do not create an EXEC module for it. 771 */ 772static int exec_linux_imgact_try(struct image_params *iparams); 773 774static int 775exec_linux_imgact_try(struct image_params *imgp) 776{ 777 const char *head = (const char *)imgp->image_header; 778 int error = -1; 779 780 /* 781 * The interpreter for shell scripts run from a linux binary needs 782 * to be located in /compat/linux if possible in order to recursively 783 * maintain linux path emulation. 784 */ 785 if (((const short *)head)[0] == SHELLMAGIC) { 786 /* 787 * Run our normal shell image activator. If it succeeds attempt 788 * to use the alternate path for the interpreter. If an alternate 789 * path is found, use our stringspace to store it. 790 */ 791 if ((error = exec_shell_imgact(imgp)) == 0) { 792 char *rpath = NULL; 793 794 linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL, 795 imgp->interpreter_name, &rpath, 0); 796 if (rpath != imgp->interpreter_name) { 797 int len = strlen(rpath) + 1; 798 799 if (len <= MAXSHELLCMDLEN) { 800 memcpy(imgp->interpreter_name, rpath, len); 801 } 802 free(rpath, M_TEMP); 803 } 804 } 805 } 806 return(error); 807} 808 809struct sysentvec linux_sysvec = { 810 LINUX_SYS_MAXSYSCALL, 811 linux_sysent, 812 0xff, 813 LINUX_SIGTBLSZ, 814 bsd_to_linux_signal, 815 ELAST + 1, 816 bsd_to_linux_errno, 817 translate_traps, 818 linux_fixup, 819 linux_sendsig, 820 linux_sigcode, 821 &linux_szsigcode, 822 linux_prepsyscall, 823 "Linux a.out", 824 linux_aout_coredump, 825 exec_linux_imgact_try, 826 LINUX_MINSIGSTKSZ, 827 PAGE_SIZE, 828 VM_MIN_ADDRESS, 829 VM_MAXUSER_ADDRESS, 830 USRSTACK, 831 PS_STRINGS, 832 VM_PROT_ALL, 833 exec_copyout_strings, 834 exec_setregs 835}; 836 837struct sysentvec elf_linux_sysvec = { 838 LINUX_SYS_MAXSYSCALL, 839 linux_sysent, 840 0xff, 841 LINUX_SIGTBLSZ, 842 bsd_to_linux_signal, 843 ELAST + 1, 844 bsd_to_linux_errno, 845 translate_traps, 846 elf_linux_fixup, 847 linux_sendsig, 848 linux_sigcode, 849 &linux_szsigcode, 850 linux_prepsyscall, 851 "Linux ELF", 852 elf32_coredump, 853 exec_linux_imgact_try, 854 LINUX_MINSIGSTKSZ, 855 PAGE_SIZE, 856 VM_MIN_ADDRESS, 857 VM_MAXUSER_ADDRESS, 858 USRSTACK, 859 PS_STRINGS, 860 VM_PROT_ALL, 861 exec_copyout_strings, 862 exec_setregs 863}; 864 865static Elf32_Brandinfo linux_brand = { 866 ELFOSABI_LINUX, 867 EM_386, 868 "Linux", 869 "/compat/linux", 870 "/lib/ld-linux.so.1", 871 &elf_linux_sysvec 872 }; 873 874static Elf32_Brandinfo linux_glibc2brand = { 875 ELFOSABI_LINUX, 876 EM_386, 877 "Linux", 878 "/compat/linux", 879 "/lib/ld-linux.so.2", 880 &elf_linux_sysvec 881 }; 882 883Elf32_Brandinfo *linux_brandlist[] = { 884 &linux_brand, 885 &linux_glibc2brand, 886 NULL 887 }; 888 889static int 890linux_elf_modevent(module_t mod, int type, void *data) 891{ 892 Elf32_Brandinfo **brandinfo; 893 int error; 894 struct linux_ioctl_handler **lihp; 895 896 error = 0; 897 898 switch(type) { 899 case MOD_LOAD: 900 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 901 ++brandinfo) 902 if (elf32_insert_brand_entry(*brandinfo) < 0) 903 error = EINVAL; 904 if (error == 0) { 905 SET_FOREACH(lihp, linux_ioctl_handler_set) 906 linux_ioctl_register_handler(*lihp); 907 if (bootverbose) 908 printf("Linux ELF exec handler installed\n"); 909 } else 910 printf("cannot insert Linux ELF brand handler\n"); 911 break; 912 case MOD_UNLOAD: 913 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 914 ++brandinfo) 915 if (elf32_brand_inuse(*brandinfo)) 916 error = EBUSY; 917 if (error == 0) { 918 for (brandinfo = &linux_brandlist[0]; 919 *brandinfo != NULL; ++brandinfo) 920 if (elf32_remove_brand_entry(*brandinfo) < 0) 921 error = EINVAL; 922 } 923 if (error == 0) { 924 SET_FOREACH(lihp, linux_ioctl_handler_set) 925 linux_ioctl_unregister_handler(*lihp); 926 if (bootverbose) 927 printf("Linux ELF exec handler removed\n"); 928 linux_mib_destroy(); 929 } else 930 printf("Could not deinstall ELF interpreter entry\n"); 931 break; 932 default: 933 break; 934 } 935 return error; 936} 937 938static moduledata_t linux_elf_mod = { 939 "linuxelf", 940 linux_elf_modevent, 941 0 942}; 943 944DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 945