linux_sysvec.c revision 103086
1/*- 2 * Copyright (c) 1994-1996 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $FreeBSD: head/sys/i386/linux/linux_sysvec.c 103086 2002-09-07 22:31:44Z peter $ 29 */ 30 31/* XXX we use functions that might not exist. */ 32#include "opt_compat.h" 33 34#ifndef COMPAT_43 35#error "Unable to compile Linux-emulator due to missing COMPAT_43 option!" 36#endif 37 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/imgact.h> 41#include <sys/imgact_aout.h> 42#include <sys/imgact_elf.h> 43#include <sys/lock.h> 44#include <sys/malloc.h> 45#include <sys/mutex.h> 46#include <sys/proc.h> 47#include <sys/signalvar.h> 48#include <sys/syscallsubr.h> 49#include <sys/sysent.h> 50#include <sys/sysproto.h> 51#include <sys/user.h> 52#include <sys/vnode.h> 53 54#include <vm/vm.h> 55#include <vm/vm_param.h> 56#include <vm/vm_page.h> 57#include <vm/vm_extern.h> 58#include <sys/exec.h> 59#include <sys/kernel.h> 60#include <sys/module.h> 61#include <machine/cpu.h> 62#include <machine/md_var.h> 63#include <sys/mutex.h> 64 65#include <vm/vm.h> 66#include <vm/vm_param.h> 67#include <vm/pmap.h> 68#include <vm/vm_map.h> 69#include <vm/vm_object.h> 70 71#include <i386/linux/linux.h> 72#include <i386/linux/linux_proto.h> 73#include <compat/linux/linux_signal.h> 74#include <compat/linux/linux_util.h> 75 76MODULE_VERSION(linux, 1); 77MODULE_DEPEND(linux, sysvmsg, 1, 1, 1); 78MODULE_DEPEND(linux, sysvsem, 1, 1, 1); 79MODULE_DEPEND(linux, sysvshm, 1, 1, 1); 80 81MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 82 83#if BYTE_ORDER == LITTLE_ENDIAN 84#define SHELLMAGIC 0x2123 /* #! */ 85#else 86#define SHELLMAGIC 0x2321 87#endif 88 89/* 90 * Allow the sendsig functions to use the ldebug() facility 91 * even though they are not syscalls themselves. Map them 92 * to syscall 0. This is slightly less bogus than using 93 * ldebug(sigreturn). 94 */ 95#define LINUX_SYS_linux_rt_sendsig 0 96#define LINUX_SYS_linux_sendsig 0 97 98extern char linux_sigcode[]; 99extern int linux_szsigcode; 100 101extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 102 103SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 104 105static int linux_fixup(register_t **stack_base, 106 struct image_params *iparams); 107static int elf_linux_fixup(register_t **stack_base, 108 struct image_params *iparams); 109static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 110 caddr_t *params); 111static void linux_sendsig(sig_t catcher, int sig, sigset_t *mask, 112 u_long code); 113 114/* 115 * Linux syscalls return negative errno's, we do positive and map them 116 */ 117static int bsd_to_linux_errno[ELAST + 1] = { 118 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 119 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 120 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 121 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 122 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 123 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 124 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 125 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 126 -6, -6, -43, -42, -75, -6, -84 127}; 128 129int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 130 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 131 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 132 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0, 133 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 134 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 135 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 136 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 137 0, LINUX_SIGUSR1, LINUX_SIGUSR2 138}; 139 140int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 141 SIGHUP, SIGINT, SIGQUIT, SIGILL, 142 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 143 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 144 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 145 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 146 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 147 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 148 SIGIO, SIGURG, 0 149}; 150 151#define LINUX_T_UNKNOWN 255 152static int _bsd_to_linux_trapcode[] = { 153 LINUX_T_UNKNOWN, /* 0 */ 154 6, /* 1 T_PRIVINFLT */ 155 LINUX_T_UNKNOWN, /* 2 */ 156 3, /* 3 T_BPTFLT */ 157 LINUX_T_UNKNOWN, /* 4 */ 158 LINUX_T_UNKNOWN, /* 5 */ 159 16, /* 6 T_ARITHTRAP */ 160 254, /* 7 T_ASTFLT */ 161 LINUX_T_UNKNOWN, /* 8 */ 162 13, /* 9 T_PROTFLT */ 163 1, /* 10 T_TRCTRAP */ 164 LINUX_T_UNKNOWN, /* 11 */ 165 14, /* 12 T_PAGEFLT */ 166 LINUX_T_UNKNOWN, /* 13 */ 167 17, /* 14 T_ALIGNFLT */ 168 LINUX_T_UNKNOWN, /* 15 */ 169 LINUX_T_UNKNOWN, /* 16 */ 170 LINUX_T_UNKNOWN, /* 17 */ 171 0, /* 18 T_DIVIDE */ 172 2, /* 19 T_NMI */ 173 4, /* 20 T_OFLOW */ 174 5, /* 21 T_BOUND */ 175 7, /* 22 T_DNA */ 176 8, /* 23 T_DOUBLEFLT */ 177 9, /* 24 T_FPOPFLT */ 178 10, /* 25 T_TSSFLT */ 179 11, /* 26 T_SEGNPFLT */ 180 12, /* 27 T_STKFLT */ 181 18, /* 28 T_MCHK */ 182 19, /* 29 T_XMMFLT */ 183 15 /* 30 T_RESERVED */ 184}; 185#define bsd_to_linux_trapcode(code) \ 186 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 187 _bsd_to_linux_trapcode[(code)]: \ 188 LINUX_T_UNKNOWN) 189 190/* 191 * If FreeBSD & Linux have a difference of opinion about what a trap 192 * means, deal with it here. 193 * 194 * MPSAFE 195 */ 196static int 197translate_traps(int signal, int trap_code) 198{ 199 if (signal != SIGBUS) 200 return signal; 201 switch (trap_code) { 202 case T_PROTFLT: 203 case T_TSSFLT: 204 case T_DOUBLEFLT: 205 case T_PAGEFLT: 206 return SIGSEGV; 207 default: 208 return signal; 209 } 210} 211 212static int 213linux_fixup(register_t **stack_base, struct image_params *imgp) 214{ 215 register_t *argv, *envp; 216 217 argv = *stack_base; 218 envp = *stack_base + (imgp->argc + 1); 219 (*stack_base)--; 220 **stack_base = (intptr_t)(void *)envp; 221 (*stack_base)--; 222 **stack_base = (intptr_t)(void *)argv; 223 (*stack_base)--; 224 **stack_base = imgp->argc; 225 return 0; 226} 227 228static int 229elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 230{ 231 Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs; 232 register_t *pos; 233 234 pos = *stack_base + (imgp->argc + imgp->envc + 2); 235 236 if (args->trace) { 237 AUXARGS_ENTRY(pos, AT_DEBUG, 1); 238 } 239 if (args->execfd != -1) { 240 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 241 } 242 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 243 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 244 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 245 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 246 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 247 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 248 AUXARGS_ENTRY(pos, AT_BASE, args->base); 249 PROC_LOCK(imgp->proc); 250 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 251 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 252 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 253 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 254 PROC_UNLOCK(imgp->proc); 255 AUXARGS_ENTRY(pos, AT_NULL, 0); 256 257 free(imgp->auxargs, M_TEMP); 258 imgp->auxargs = NULL; 259 260 (*stack_base)--; 261 **stack_base = (long)imgp->argc; 262 return 0; 263} 264 265extern int _ucodesel, _udatasel; 266extern unsigned long linux_sznonrtsigcode; 267 268static void 269linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 270{ 271 register struct thread *td = curthread; 272 register struct proc *p = td->td_proc; 273 register struct trapframe *regs; 274 struct l_rt_sigframe *fp, frame; 275 int oonstack; 276 277 PROC_LOCK_ASSERT(p, MA_OWNED); 278 regs = td->td_frame; 279 oonstack = sigonstack(regs->tf_esp); 280 281#ifdef DEBUG 282 if (ldebug(rt_sendsig)) 283 printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"), 284 catcher, sig, (void*)mask, code); 285#endif 286 /* 287 * Allocate space for the signal handler context. 288 */ 289 if ((p->p_flag & P_ALTSTACK) && !oonstack && 290 SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) { 291 fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp + 292 p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 293 } else 294 fp = (struct l_rt_sigframe *)regs->tf_esp - 1; 295 PROC_UNLOCK(p); 296 297 /* 298 * Build the argument list for the signal handler. 299 */ 300 if (p->p_sysent->sv_sigtbl) 301 if (sig <= p->p_sysent->sv_sigsize) 302 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 303 304 frame.sf_handler = catcher; 305 frame.sf_sig = sig; 306 frame.sf_siginfo = &fp->sf_si; 307 frame.sf_ucontext = &fp->sf_sc; 308 309 /* Fill in POSIX parts */ 310 frame.sf_si.lsi_signo = sig; 311 frame.sf_si.lsi_code = code; 312 frame.sf_si.lsi_addr = (void *)regs->tf_err; 313 314 /* 315 * Build the signal context to be used by sigreturn. 316 */ 317 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 318 frame.sf_sc.uc_link = NULL; /* XXX ??? */ 319 320 PROC_LOCK(p); 321 frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp; 322 frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size; 323 frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK) 324 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 325 PROC_UNLOCK(p); 326 327 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 328 329 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 330 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 331 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 332 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 333 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 334 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; 335 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; 336 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; 337 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; 338 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; 339 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; 340 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; 341 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; 342 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 343 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; 344 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; 345 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 346 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 347 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 348 349#ifdef DEBUG 350 if (ldebug(rt_sendsig)) 351 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 352 frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp, 353 p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 354#endif 355 356 if (copyout(&frame, fp, sizeof(frame)) != 0) { 357 /* 358 * Process has trashed its stack; give it an illegal 359 * instruction to halt it in its tracks. 360 */ 361#ifdef DEBUG 362 if (ldebug(rt_sendsig)) 363 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 364 fp, oonstack); 365#endif 366 PROC_LOCK(p); 367 sigexit(td, SIGILL); 368 } 369 370 /* 371 * Build context to run handler in. 372 */ 373 regs->tf_esp = (int)fp; 374 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 375 linux_sznonrtsigcode; 376 regs->tf_eflags &= ~(PSL_T | PSL_VM); 377 regs->tf_cs = _ucodesel; 378 regs->tf_ds = _udatasel; 379 regs->tf_es = _udatasel; 380 regs->tf_fs = _udatasel; 381 regs->tf_ss = _udatasel; 382 PROC_LOCK(p); 383} 384 385 386/* 387 * Send an interrupt to process. 388 * 389 * Stack is set up to allow sigcode stored 390 * in u. to call routine, followed by kcall 391 * to sigreturn routine below. After sigreturn 392 * resets the signal mask, the stack, and the 393 * frame pointer, it returns to the user 394 * specified pc, psl. 395 */ 396 397static void 398linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 399{ 400 register struct thread *td = curthread; 401 register struct proc *p = td->td_proc; 402 register struct trapframe *regs; 403 struct l_sigframe *fp, frame; 404 l_sigset_t lmask; 405 int oonstack, i; 406 407 PROC_LOCK_ASSERT(p, MA_OWNED); 408 if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) { 409 /* Signal handler installed with SA_SIGINFO. */ 410 linux_rt_sendsig(catcher, sig, mask, code); 411 return; 412 } 413 414 regs = td->td_frame; 415 oonstack = sigonstack(regs->tf_esp); 416 417#ifdef DEBUG 418 if (ldebug(sendsig)) 419 printf(ARGS(sendsig, "%p, %d, %p, %lu"), 420 catcher, sig, (void*)mask, code); 421#endif 422 423 /* 424 * Allocate space for the signal handler context. 425 */ 426 if ((p->p_flag & P_ALTSTACK) && !oonstack && 427 SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) { 428 fp = (struct l_sigframe *)(p->p_sigstk.ss_sp + 429 p->p_sigstk.ss_size - sizeof(struct l_sigframe)); 430 } else 431 fp = (struct l_sigframe *)regs->tf_esp - 1; 432 PROC_UNLOCK(p); 433 434 /* 435 * Build the argument list for the signal handler. 436 */ 437 if (p->p_sysent->sv_sigtbl) 438 if (sig <= p->p_sysent->sv_sigsize) 439 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 440 441 frame.sf_handler = catcher; 442 frame.sf_sig = sig; 443 444 bsd_to_linux_sigset(mask, &lmask); 445 446 /* 447 * Build the signal context to be used by sigreturn. 448 */ 449 frame.sf_sc.sc_mask = lmask.__bits[0]; 450 frame.sf_sc.sc_gs = rgs(); 451 frame.sf_sc.sc_fs = regs->tf_fs; 452 frame.sf_sc.sc_es = regs->tf_es; 453 frame.sf_sc.sc_ds = regs->tf_ds; 454 frame.sf_sc.sc_edi = regs->tf_edi; 455 frame.sf_sc.sc_esi = regs->tf_esi; 456 frame.sf_sc.sc_ebp = regs->tf_ebp; 457 frame.sf_sc.sc_ebx = regs->tf_ebx; 458 frame.sf_sc.sc_edx = regs->tf_edx; 459 frame.sf_sc.sc_ecx = regs->tf_ecx; 460 frame.sf_sc.sc_eax = regs->tf_eax; 461 frame.sf_sc.sc_eip = regs->tf_eip; 462 frame.sf_sc.sc_cs = regs->tf_cs; 463 frame.sf_sc.sc_eflags = regs->tf_eflags; 464 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 465 frame.sf_sc.sc_ss = regs->tf_ss; 466 frame.sf_sc.sc_err = regs->tf_err; 467 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 468 469 bzero(&frame.sf_fpstate, sizeof(struct l_fpstate)); 470 471 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 472 frame.sf_extramask[i] = lmask.__bits[i+1]; 473 474 if (copyout(&frame, fp, sizeof(frame)) != 0) { 475 /* 476 * Process has trashed its stack; give it an illegal 477 * instruction to halt it in its tracks. 478 */ 479 PROC_LOCK(p); 480 sigexit(td, SIGILL); 481 } 482 483 /* 484 * Build context to run handler in. 485 */ 486 regs->tf_esp = (int)fp; 487 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); 488 regs->tf_eflags &= ~(PSL_T | PSL_VM); 489 regs->tf_cs = _ucodesel; 490 regs->tf_ds = _udatasel; 491 regs->tf_es = _udatasel; 492 regs->tf_fs = _udatasel; 493 regs->tf_ss = _udatasel; 494 PROC_LOCK(p); 495} 496 497/* 498 * System call to cleanup state after a signal 499 * has been taken. Reset signal mask and 500 * stack state from context left by sendsig (above). 501 * Return to previous pc and psl as specified by 502 * context left by sendsig. Check carefully to 503 * make sure that the user has not modified the 504 * psl to gain improper privileges or to cause 505 * a machine fault. 506 */ 507int 508linux_sigreturn(td, args) 509 struct thread *td; 510 struct linux_sigreturn_args *args; 511{ 512 struct proc *p = td->td_proc; 513 struct l_sigframe frame; 514 register struct trapframe *regs; 515 l_sigset_t lmask; 516 int eflags, i; 517 518 regs = td->td_frame; 519 520#ifdef DEBUG 521 if (ldebug(sigreturn)) 522 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 523#endif 524 /* 525 * The trampoline code hands us the sigframe. 526 * It is unsafe to keep track of it ourselves, in the event that a 527 * program jumps out of a signal handler. 528 */ 529 if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0) 530 return (EFAULT); 531 532 /* 533 * Check for security violations. 534 */ 535#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 536 eflags = frame.sf_sc.sc_eflags; 537 /* 538 * XXX do allow users to change the privileged flag PSL_RF. The 539 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 540 * sometimes set it there too. tf_eflags is kept in the signal 541 * context during signal handling and there is no other place 542 * to remember it, so the PSL_RF bit may be corrupted by the 543 * signal handler without us knowing. Corruption of the PSL_RF 544 * bit at worst causes one more or one less debugger trap, so 545 * allowing it is fairly harmless. 546 */ 547 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { 548 return(EINVAL); 549 } 550 551 /* 552 * Don't allow users to load a valid privileged %cs. Let the 553 * hardware check for invalid selectors, excess privilege in 554 * other selectors, invalid %eip's and invalid %esp's. 555 */ 556#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 557 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 558 trapsignal(p, SIGBUS, T_PROTFLT); 559 return(EINVAL); 560 } 561 562 lmask.__bits[0] = frame.sf_sc.sc_mask; 563 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 564 lmask.__bits[i+1] = frame.sf_extramask[i]; 565 PROC_LOCK(p); 566 linux_to_bsd_sigset(&lmask, &p->p_sigmask); 567 SIG_CANTMASK(p->p_sigmask); 568 signotify(p); 569 PROC_UNLOCK(p); 570 571 /* 572 * Restore signal context. 573 */ 574 /* %gs was restored by the trampoline. */ 575 regs->tf_fs = frame.sf_sc.sc_fs; 576 regs->tf_es = frame.sf_sc.sc_es; 577 regs->tf_ds = frame.sf_sc.sc_ds; 578 regs->tf_edi = frame.sf_sc.sc_edi; 579 regs->tf_esi = frame.sf_sc.sc_esi; 580 regs->tf_ebp = frame.sf_sc.sc_ebp; 581 regs->tf_ebx = frame.sf_sc.sc_ebx; 582 regs->tf_edx = frame.sf_sc.sc_edx; 583 regs->tf_ecx = frame.sf_sc.sc_ecx; 584 regs->tf_eax = frame.sf_sc.sc_eax; 585 regs->tf_eip = frame.sf_sc.sc_eip; 586 regs->tf_cs = frame.sf_sc.sc_cs; 587 regs->tf_eflags = eflags; 588 regs->tf_esp = frame.sf_sc.sc_esp_at_signal; 589 regs->tf_ss = frame.sf_sc.sc_ss; 590 591 return (EJUSTRETURN); 592} 593 594/* 595 * System call to cleanup state after a signal 596 * has been taken. Reset signal mask and 597 * stack state from context left by rt_sendsig (above). 598 * Return to previous pc and psl as specified by 599 * context left by sendsig. Check carefully to 600 * make sure that the user has not modified the 601 * psl to gain improper privileges or to cause 602 * a machine fault. 603 */ 604int 605linux_rt_sigreturn(td, args) 606 struct thread *td; 607 struct linux_rt_sigreturn_args *args; 608{ 609 struct proc *p = td->td_proc; 610 struct l_ucontext uc; 611 struct l_sigcontext *context; 612 l_stack_t *lss; 613 stack_t ss; 614 register struct trapframe *regs; 615 int eflags; 616 617 regs = td->td_frame; 618 619#ifdef DEBUG 620 if (ldebug(rt_sigreturn)) 621 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 622#endif 623 /* 624 * The trampoline code hands us the ucontext. 625 * It is unsafe to keep track of it ourselves, in the event that a 626 * program jumps out of a signal handler. 627 */ 628 if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0) 629 return (EFAULT); 630 631 context = &uc.uc_mcontext; 632 633 /* 634 * Check for security violations. 635 */ 636#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 637 eflags = context->sc_eflags; 638 /* 639 * XXX do allow users to change the privileged flag PSL_RF. The 640 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 641 * sometimes set it there too. tf_eflags is kept in the signal 642 * context during signal handling and there is no other place 643 * to remember it, so the PSL_RF bit may be corrupted by the 644 * signal handler without us knowing. Corruption of the PSL_RF 645 * bit at worst causes one more or one less debugger trap, so 646 * allowing it is fairly harmless. 647 */ 648 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { 649 return(EINVAL); 650 } 651 652 /* 653 * Don't allow users to load a valid privileged %cs. Let the 654 * hardware check for invalid selectors, excess privilege in 655 * other selectors, invalid %eip's and invalid %esp's. 656 */ 657#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 658 if (!CS_SECURE(context->sc_cs)) { 659 trapsignal(p, SIGBUS, T_PROTFLT); 660 return(EINVAL); 661 } 662 663 PROC_LOCK(p); 664 linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask); 665 SIG_CANTMASK(p->p_sigmask); 666 signotify(p); 667 PROC_UNLOCK(p); 668 669 /* 670 * Restore signal context 671 */ 672 /* %gs was restored by the trampoline. */ 673 regs->tf_fs = context->sc_fs; 674 regs->tf_es = context->sc_es; 675 regs->tf_ds = context->sc_ds; 676 regs->tf_edi = context->sc_edi; 677 regs->tf_esi = context->sc_esi; 678 regs->tf_ebp = context->sc_ebp; 679 regs->tf_ebx = context->sc_ebx; 680 regs->tf_edx = context->sc_edx; 681 regs->tf_ecx = context->sc_ecx; 682 regs->tf_eax = context->sc_eax; 683 regs->tf_eip = context->sc_eip; 684 regs->tf_cs = context->sc_cs; 685 regs->tf_eflags = eflags; 686 regs->tf_esp = context->sc_esp_at_signal; 687 regs->tf_ss = context->sc_ss; 688 689 /* 690 * call sigaltstack & ignore results.. 691 */ 692 lss = &uc.uc_stack; 693 ss.ss_sp = lss->ss_sp; 694 ss.ss_size = lss->ss_size; 695 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 696 697#ifdef DEBUG 698 if (ldebug(rt_sigreturn)) 699 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 700 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 701#endif 702 (void)kern_sigaltstack(td, &ss, NULL); 703 704 return (EJUSTRETURN); 705} 706 707/* 708 * MPSAFE 709 */ 710static void 711linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 712{ 713 args[0] = tf->tf_ebx; 714 args[1] = tf->tf_ecx; 715 args[2] = tf->tf_edx; 716 args[3] = tf->tf_esi; 717 args[4] = tf->tf_edi; 718 args[5] = tf->tf_ebp; /* Unconfirmed */ 719 *params = NULL; /* no copyin */ 720} 721 722 723 724/* 725 * Dump core, into a file named as described in the comments for 726 * expand_name(), unless the process was setuid/setgid. 727 */ 728static int 729linux_aout_coredump(struct thread *td, struct vnode *vp, off_t limit) 730{ 731 struct proc *p = td->td_proc; 732 struct ucred *cred = td->td_ucred; 733 struct vmspace *vm = p->p_vmspace; 734 char *tempuser; 735 int error; 736 737 if (ctob((uarea_pages + kstack_pages) + 738 vm->vm_dsize + vm->vm_ssize) >= limit) 739 return (EFAULT); 740 tempuser = malloc(ctob(uarea_pages + kstack_pages), M_TEMP, 741 M_WAITOK | M_ZERO); 742 if (tempuser == NULL) 743 return (ENOMEM); 744 PROC_LOCK(p); 745 fill_kinfo_proc(p, &p->p_uarea->u_kproc); 746 PROC_UNLOCK(p); 747 bcopy(p->p_uarea, tempuser, sizeof(struct user)); 748 bcopy(td->td_frame, 749 tempuser + ctob(uarea_pages) + 750 ((caddr_t)td->td_frame - (caddr_t)td->td_kstack), 751 sizeof(struct trapframe)); 752 error = vn_rdwr(UIO_WRITE, vp, (caddr_t)tempuser, 753 ctob(uarea_pages + kstack_pages), 754 (off_t)0, UIO_SYSSPACE, IO_UNIT, cred, NOCRED, 755 (int *)NULL, td); 756 free(tempuser, M_TEMP); 757 if (error == 0) 758 error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr, 759 (int)ctob(vm->vm_dsize), 760 (off_t)ctob(uarea_pages + kstack_pages), UIO_USERSPACE, 761 IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td); 762 if (error == 0) 763 error = vn_rdwr_inchunks(UIO_WRITE, vp, 764 (caddr_t)trunc_page(USRSTACK - ctob(vm->vm_ssize)), 765 round_page(ctob(vm->vm_ssize)), 766 (off_t)ctob(uarea_pages + kstack_pages) + 767 ctob(vm->vm_dsize), UIO_USERSPACE, 768 IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td); 769 return (error); 770} 771/* 772 * If a linux binary is exec'ing something, try this image activator 773 * first. We override standard shell script execution in order to 774 * be able to modify the interpreter path. We only do this if a linux 775 * binary is doing the exec, so we do not create an EXEC module for it. 776 */ 777static int exec_linux_imgact_try(struct image_params *iparams); 778 779static int 780exec_linux_imgact_try(imgp) 781 struct image_params *imgp; 782{ 783 const char *head = (const char *)imgp->image_header; 784 int error = -1; 785 786 /* 787 * The interpreter for shell scripts run from a linux binary needs 788 * to be located in /compat/linux if possible in order to recursively 789 * maintain linux path emulation. 790 */ 791 if (((const short *)head)[0] == SHELLMAGIC) { 792 /* 793 * Run our normal shell image activator. If it succeeds attempt 794 * to use the alternate path for the interpreter. If an alternate 795 * path is found, use our stringspace to store it. 796 */ 797 if ((error = exec_shell_imgact(imgp)) == 0) { 798 char *rpath = NULL; 799 800 linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL, 801 imgp->interpreter_name, &rpath, 0); 802 if (rpath != imgp->interpreter_name) { 803 int len = strlen(rpath) + 1; 804 805 if (len <= MAXSHELLCMDLEN) { 806 memcpy(imgp->interpreter_name, rpath, len); 807 } 808 free(rpath, M_TEMP); 809 } 810 } 811 } 812 return(error); 813} 814 815struct sysentvec linux_sysvec = { 816 LINUX_SYS_MAXSYSCALL, 817 linux_sysent, 818 0xff, 819 LINUX_SIGTBLSZ, 820 bsd_to_linux_signal, 821 ELAST + 1, 822 bsd_to_linux_errno, 823 translate_traps, 824 linux_fixup, 825 linux_sendsig, 826 linux_sigcode, 827 &linux_szsigcode, 828 linux_prepsyscall, 829 "Linux a.out", 830 linux_aout_coredump, 831 exec_linux_imgact_try, 832 LINUX_MINSIGSTKSZ, 833 PAGE_SIZE, 834 VM_MIN_ADDRESS, 835 VM_MAXUSER_ADDRESS, 836 USRSTACK, 837 PS_STRINGS, 838 VM_PROT_ALL, 839 exec_copyout_strings, 840 exec_setregs 841}; 842 843struct sysentvec elf_linux_sysvec = { 844 LINUX_SYS_MAXSYSCALL, 845 linux_sysent, 846 0xff, 847 LINUX_SIGTBLSZ, 848 bsd_to_linux_signal, 849 ELAST + 1, 850 bsd_to_linux_errno, 851 translate_traps, 852 elf_linux_fixup, 853 linux_sendsig, 854 linux_sigcode, 855 &linux_szsigcode, 856 linux_prepsyscall, 857 "Linux ELF", 858 elf32_coredump, 859 exec_linux_imgact_try, 860 LINUX_MINSIGSTKSZ, 861 PAGE_SIZE, 862 VM_MIN_ADDRESS, 863 VM_MAXUSER_ADDRESS, 864 USRSTACK, 865 PS_STRINGS, 866 VM_PROT_ALL, 867 exec_copyout_strings, 868 exec_setregs 869}; 870 871static Elf32_Brandinfo linux_brand = { 872 ELFOSABI_LINUX, 873 EM_386, 874 "Linux", 875 "/compat/linux", 876 "/lib/ld-linux.so.1", 877 &elf_linux_sysvec 878 }; 879 880static Elf32_Brandinfo linux_glibc2brand = { 881 ELFOSABI_LINUX, 882 EM_386, 883 "Linux", 884 "/compat/linux", 885 "/lib/ld-linux.so.2", 886 &elf_linux_sysvec 887 }; 888 889Elf32_Brandinfo *linux_brandlist[] = { 890 &linux_brand, 891 &linux_glibc2brand, 892 NULL 893 }; 894 895static int 896linux_elf_modevent(module_t mod, int type, void *data) 897{ 898 Elf32_Brandinfo **brandinfo; 899 int error; 900 struct linux_ioctl_handler **lihp; 901 902 error = 0; 903 904 switch(type) { 905 case MOD_LOAD: 906 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 907 ++brandinfo) 908 if (elf32_insert_brand_entry(*brandinfo) < 0) 909 error = EINVAL; 910 if (error == 0) { 911 SET_FOREACH(lihp, linux_ioctl_handler_set) 912 linux_ioctl_register_handler(*lihp); 913 if (bootverbose) 914 printf("Linux ELF exec handler installed\n"); 915 } else 916 printf("cannot insert Linux ELF brand handler\n"); 917 break; 918 case MOD_UNLOAD: 919 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 920 ++brandinfo) 921 if (elf32_brand_inuse(*brandinfo)) 922 error = EBUSY; 923 if (error == 0) { 924 for (brandinfo = &linux_brandlist[0]; 925 *brandinfo != NULL; ++brandinfo) 926 if (elf32_remove_brand_entry(*brandinfo) < 0) 927 error = EINVAL; 928 } 929 if (error == 0) { 930 SET_FOREACH(lihp, linux_ioctl_handler_set) 931 linux_ioctl_unregister_handler(*lihp); 932 if (bootverbose) 933 printf("Linux ELF exec handler removed\n"); 934 } else 935 printf("Could not deinstall ELF interpreter entry\n"); 936 break; 937 default: 938 break; 939 } 940 return error; 941} 942 943static moduledata_t linux_elf_mod = { 944 "linuxelf", 945 linux_elf_modevent, 946 0 947}; 948 949DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 950