linux32_sysvec.c revision 293540
131921Sbrian/*- 231921Sbrian * Copyright (c) 2004 Tim J. Robbins 331921Sbrian * Copyright (c) 2003 Peter Wemm 431921Sbrian * Copyright (c) 2002 Doug Rabson 531921Sbrian * Copyright (c) 1998-1999 Andrew Gallatin 631921Sbrian * Copyright (c) 1994-1996 S��ren Schmidt 731921Sbrian * All rights reserved. 831921Sbrian * 931921Sbrian * Redistribution and use in source and binary forms, with or without 1031921Sbrian * modification, are permitted provided that the following conditions 1131921Sbrian * are met: 1231921Sbrian * 1. Redistributions of source code must retain the above copyright 1331921Sbrian * notice, this list of conditions and the following disclaimer 1431921Sbrian * in this position and unchanged. 1531921Sbrian * 2. Redistributions in binary form must reproduce the above copyright 1631921Sbrian * notice, this list of conditions and the following disclaimer in the 1731921Sbrian * documentation and/or other materials provided with the distribution. 1831921Sbrian * 3. The name of the author may not be used to endorse or promote products 1931921Sbrian * derived from this software without specific prior written permission 2031921Sbrian * 2131921Sbrian * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 2231921Sbrian * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 2331921Sbrian * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2431921Sbrian * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2531921Sbrian * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2650479Speter * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2731061Sbrian * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2831061Sbrian * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2964802Sbrian * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30202192Sed * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3136285Sbrian */ 3233603Sbrian 3331061Sbrian#include <sys/cdefs.h> 3431061Sbrian__FBSDID("$FreeBSD: stable/10/sys/amd64/linux32/linux32_sysvec.c 293540 2016-01-09 16:29:51Z dchagin $"); 3531061Sbrian#include "opt_compat.h" 3631061Sbrian 3731061Sbrian#ifndef COMPAT_FREEBSD32 3831061Sbrian#error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!" 3936285Sbrian#endif 4032025Sbrian 4131061Sbrian#define __ELF_WORD_SIZE 32 4236450Sbrian 4331061Sbrian#include <sys/param.h> 44202192Sed#include <sys/systm.h> 45202192Sed#include <sys/exec.h> 4636285Sbrian#include <sys/fcntl.h> 4736285Sbrian#include <sys/imgact.h> 4836467Sbrian#include <sys/imgact_elf.h> 4953241Sbrian#include <sys/kernel.h> 5051525Sbrian#include <sys/lock.h> 5151525Sbrian#include <sys/malloc.h> 5253535Sbrian#include <sys/module.h> 5353535Sbrian#include <sys/mutex.h> 5453535Sbrian#include <sys/proc.h> 5564802Sbrian#include <sys/resourcevar.h> 5664802Sbrian#include <sys/signalvar.h> 5764802Sbrian#include <sys/sysctl.h> 5864802Sbrian#include <sys/syscallsubr.h> 5964802Sbrian#include <sys/sysent.h> 6064802Sbrian#include <sys/sysproto.h> 6164802Sbrian#include <sys/vnode.h> 6264802Sbrian#include <sys/eventhandler.h> 6364802Sbrian 6464802Sbrian#include <vm/vm.h> 6564802Sbrian#include <vm/pmap.h> 6664802Sbrian#include <vm/vm_extern.h> 67202192Sed#include <vm/vm_map.h> 68202192Sed#include <vm/vm_object.h> 6964802Sbrian#include <vm/vm_page.h> 7064802Sbrian#include <vm/vm_param.h> 7164802Sbrian 7264802Sbrian#include <machine/cpu.h> 7364802Sbrian#include <machine/md_var.h> 74134885Smarcel#include <machine/pcb.h> 75134885Smarcel#include <machine/specialreg.h> 7664802Sbrian 7764802Sbrian#include <amd64/linux32/linux.h> 7864802Sbrian#include <amd64/linux32/linux32_proto.h> 7964802Sbrian#include <compat/linux/linux_emul.h> 8064802Sbrian#include <compat/linux/linux_futex.h> 8164802Sbrian#include <compat/linux/linux_ioctl.h> 82#include <compat/linux/linux_mib.h> 83#include <compat/linux/linux_misc.h> 84#include <compat/linux/linux_signal.h> 85#include <compat/linux/linux_util.h> 86#include <compat/linux/linux_vdso.h> 87 88MODULE_VERSION(linux, 1); 89 90#define AUXARGS_ENTRY_32(pos, id, val) \ 91 do { \ 92 suword32(pos++, id); \ 93 suword32(pos++, val); \ 94 } while (0) 95 96#if BYTE_ORDER == LITTLE_ENDIAN 97#define SHELLMAGIC 0x2123 /* #! */ 98#else 99#define SHELLMAGIC 0x2321 100#endif 101 102/* 103 * Allow the sendsig functions to use the ldebug() facility 104 * even though they are not syscalls themselves. Map them 105 * to syscall 0. This is slightly less bogus than using 106 * ldebug(sigreturn). 107 */ 108#define LINUX_SYS_linux_rt_sendsig 0 109#define LINUX_SYS_linux_sendsig 0 110 111const char *linux_kplatform; 112static int linux_szsigcode; 113static vm_object_t linux_shared_page_obj; 114static char *linux_shared_page_mapping; 115extern char _binary_linux32_locore_o_start; 116extern char _binary_linux32_locore_o_end; 117 118extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 119 120SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 121 122static int elf_linux_fixup(register_t **stack_base, 123 struct image_params *iparams); 124static register_t *linux_copyout_strings(struct image_params *imgp); 125static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 126static void exec_linux_setregs(struct thread *td, 127 struct image_params *imgp, u_long stack); 128static void linux32_fixlimit(struct rlimit *rl, int which); 129static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel); 130static void linux_vdso_install(void *param); 131static void linux_vdso_deinstall(void *param); 132 133/* 134 * Linux syscalls return negative errno's, we do positive and map them 135 * Reference: 136 * FreeBSD: src/sys/sys/errno.h 137 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h 138 * linux-2.6.17.8/include/asm-generic/errno.h 139 */ 140static int bsd_to_linux_errno[ELAST + 1] = { 141 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 142 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 143 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 144 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 145 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 146 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 147 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 148 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 149 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, 150 -72, -67, -71 151}; 152 153int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 154 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 155 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 156 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 157 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 158 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 159 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 160 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 161 0, LINUX_SIGUSR1, LINUX_SIGUSR2 162}; 163 164int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 165 SIGHUP, SIGINT, SIGQUIT, SIGILL, 166 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 167 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 168 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 169 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 170 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 171 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 172 SIGIO, SIGURG, SIGSYS 173}; 174 175#define LINUX_T_UNKNOWN 255 176static int _bsd_to_linux_trapcode[] = { 177 LINUX_T_UNKNOWN, /* 0 */ 178 6, /* 1 T_PRIVINFLT */ 179 LINUX_T_UNKNOWN, /* 2 */ 180 3, /* 3 T_BPTFLT */ 181 LINUX_T_UNKNOWN, /* 4 */ 182 LINUX_T_UNKNOWN, /* 5 */ 183 16, /* 6 T_ARITHTRAP */ 184 254, /* 7 T_ASTFLT */ 185 LINUX_T_UNKNOWN, /* 8 */ 186 13, /* 9 T_PROTFLT */ 187 1, /* 10 T_TRCTRAP */ 188 LINUX_T_UNKNOWN, /* 11 */ 189 14, /* 12 T_PAGEFLT */ 190 LINUX_T_UNKNOWN, /* 13 */ 191 17, /* 14 T_ALIGNFLT */ 192 LINUX_T_UNKNOWN, /* 15 */ 193 LINUX_T_UNKNOWN, /* 16 */ 194 LINUX_T_UNKNOWN, /* 17 */ 195 0, /* 18 T_DIVIDE */ 196 2, /* 19 T_NMI */ 197 4, /* 20 T_OFLOW */ 198 5, /* 21 T_BOUND */ 199 7, /* 22 T_DNA */ 200 8, /* 23 T_DOUBLEFLT */ 201 9, /* 24 T_FPOPFLT */ 202 10, /* 25 T_TSSFLT */ 203 11, /* 26 T_SEGNPFLT */ 204 12, /* 27 T_STKFLT */ 205 18, /* 28 T_MCHK */ 206 19, /* 29 T_XMMFLT */ 207 15 /* 30 T_RESERVED */ 208}; 209#define bsd_to_linux_trapcode(code) \ 210 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 211 _bsd_to_linux_trapcode[(code)]: \ 212 LINUX_T_UNKNOWN) 213 214struct linux32_ps_strings { 215 u_int32_t ps_argvstr; /* first of 0 or more argument strings */ 216 u_int ps_nargvstr; /* the number of argument strings */ 217 u_int32_t ps_envstr; /* first of 0 or more environment strings */ 218 u_int ps_nenvstr; /* the number of environment strings */ 219}; 220 221LINUX_VDSO_SYM_INTPTR(linux32_sigcode); 222LINUX_VDSO_SYM_INTPTR(linux32_rt_sigcode); 223LINUX_VDSO_SYM_INTPTR(linux32_vsyscall); 224LINUX_VDSO_SYM_CHAR(linux_platform); 225 226/* 227 * If FreeBSD & Linux have a difference of opinion about what a trap 228 * means, deal with it here. 229 * 230 * MPSAFE 231 */ 232static int 233translate_traps(int signal, int trap_code) 234{ 235 if (signal != SIGBUS) 236 return signal; 237 switch (trap_code) { 238 case T_PROTFLT: 239 case T_TSSFLT: 240 case T_DOUBLEFLT: 241 case T_PAGEFLT: 242 return SIGSEGV; 243 default: 244 return signal; 245 } 246} 247 248static int 249elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 250{ 251 Elf32_Auxargs *args; 252 Elf32_Addr *base; 253 Elf32_Addr *pos; 254 struct linux32_ps_strings *arginfo; 255 256 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 257 258 KASSERT(curthread->td_proc == imgp->proc, 259 ("unsafe elf_linux_fixup(), should be curproc")); 260 base = (Elf32_Addr *)*stack_base; 261 args = (Elf32_Auxargs *)imgp->auxargs; 262 pos = base + (imgp->args->argc + imgp->args->envc + 2); 263 264 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO_EHDR, 265 imgp->proc->p_sysent->sv_shared_page_base); 266 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO, linux32_vsyscall); 267 AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature); 268 269 /* 270 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, 271 * as it has appeared in the 2.4.0-rc7 first time. 272 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), 273 * glibc falls back to the hard-coded CLK_TCK value when aux entry 274 * is not present. 275 * Also see linux_times() implementation. 276 */ 277 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) 278 AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz); 279 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr); 280 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent); 281 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum); 282 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz); 283 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags); 284 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry); 285 AUXARGS_ENTRY_32(pos, AT_BASE, args->base); 286 AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0); 287 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 288 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 289 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 290 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 291 AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform)); 292 AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, PTROUT(imgp->canary)); 293 if (imgp->execpathp != 0) 294 AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, PTROUT(imgp->execpathp)); 295 if (args->execfd != -1) 296 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd); 297 AUXARGS_ENTRY_32(pos, AT_NULL, 0); 298 299 free(imgp->auxargs, M_TEMP); 300 imgp->auxargs = NULL; 301 302 base--; 303 suword32(base, (uint32_t)imgp->args->argc); 304 *stack_base = (register_t *)base; 305 return (0); 306} 307 308static void 309linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 310{ 311 struct thread *td = curthread; 312 struct proc *p = td->td_proc; 313 struct sigacts *psp; 314 struct trapframe *regs; 315 struct l_rt_sigframe *fp, frame; 316 int oonstack; 317 int sig; 318 int code; 319 320 sig = ksi->ksi_signo; 321 code = ksi->ksi_code; 322 PROC_LOCK_ASSERT(p, MA_OWNED); 323 psp = p->p_sigacts; 324 mtx_assert(&psp->ps_mtx, MA_OWNED); 325 regs = td->td_frame; 326 oonstack = sigonstack(regs->tf_rsp); 327 328#ifdef DEBUG 329 if (ldebug(rt_sendsig)) 330 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 331 catcher, sig, (void*)mask, code); 332#endif 333 /* 334 * Allocate space for the signal handler context. 335 */ 336 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 337 SIGISMEMBER(psp->ps_sigonstack, sig)) { 338 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 339 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 340 } else 341 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; 342 mtx_unlock(&psp->ps_mtx); 343 344 /* 345 * Build the argument list for the signal handler. 346 */ 347 sig = BSD_TO_LINUX_SIGNAL(sig); 348 349 bzero(&frame, sizeof(frame)); 350 351 frame.sf_handler = PTROUT(catcher); 352 frame.sf_sig = sig; 353 frame.sf_siginfo = PTROUT(&fp->sf_si); 354 frame.sf_ucontext = PTROUT(&fp->sf_sc); 355 356 /* Fill in POSIX parts */ 357 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); 358 359 /* 360 * Build the signal context to be used by sigreturn 361 * and libgcc unwind. 362 */ 363 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 364 frame.sf_sc.uc_link = 0; /* XXX ??? */ 365 366 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); 367 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 368 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 369 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 370 PROC_UNLOCK(p); 371 372 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 373 374 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 375 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; 376 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; 377 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; 378 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; 379 frame.sf_sc.uc_mcontext.sc_esp = regs->tf_rsp; 380 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; 381 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; 382 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; 383 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; 384 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 385 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs; 386 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 387 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 388 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 389 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; 390 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; 391 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 392 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 393 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 394 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 395 396#ifdef DEBUG 397 if (ldebug(rt_sendsig)) 398 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 399 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 400 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 401#endif 402 403 if (copyout(&frame, fp, sizeof(frame)) != 0) { 404 /* 405 * Process has trashed its stack; give it an illegal 406 * instruction to halt it in its tracks. 407 */ 408#ifdef DEBUG 409 if (ldebug(rt_sendsig)) 410 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 411 fp, oonstack); 412#endif 413 PROC_LOCK(p); 414 sigexit(td, SIGILL); 415 } 416 417 /* 418 * Build context to run handler in. 419 */ 420 regs->tf_rsp = PTROUT(fp); 421 regs->tf_rip = linux32_rt_sigcode; 422 regs->tf_rflags &= ~(PSL_T | PSL_D); 423 regs->tf_cs = _ucode32sel; 424 regs->tf_ss = _udatasel; 425 regs->tf_ds = _udatasel; 426 regs->tf_es = _udatasel; 427 regs->tf_fs = _ufssel; 428 regs->tf_gs = _ugssel; 429 regs->tf_flags = TF_HASSEGS; 430 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 431 PROC_LOCK(p); 432 mtx_lock(&psp->ps_mtx); 433} 434 435 436/* 437 * Send an interrupt to process. 438 * 439 * Stack is set up to allow sigcode stored 440 * in u. to call routine, followed by kcall 441 * to sigreturn routine below. After sigreturn 442 * resets the signal mask, the stack, and the 443 * frame pointer, it returns to the user 444 * specified pc, psl. 445 */ 446static void 447linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 448{ 449 struct thread *td = curthread; 450 struct proc *p = td->td_proc; 451 struct sigacts *psp; 452 struct trapframe *regs; 453 struct l_sigframe *fp, frame; 454 l_sigset_t lmask; 455 int oonstack, i; 456 int sig, code; 457 458 sig = ksi->ksi_signo; 459 code = ksi->ksi_code; 460 PROC_LOCK_ASSERT(p, MA_OWNED); 461 psp = p->p_sigacts; 462 mtx_assert(&psp->ps_mtx, MA_OWNED); 463 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 464 /* Signal handler installed with SA_SIGINFO. */ 465 linux_rt_sendsig(catcher, ksi, mask); 466 return; 467 } 468 469 regs = td->td_frame; 470 oonstack = sigonstack(regs->tf_rsp); 471 472#ifdef DEBUG 473 if (ldebug(sendsig)) 474 printf(ARGS(sendsig, "%p, %d, %p, %u"), 475 catcher, sig, (void*)mask, code); 476#endif 477 478 /* 479 * Allocate space for the signal handler context. 480 */ 481 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 482 SIGISMEMBER(psp->ps_sigonstack, sig)) { 483 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 484 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 485 } else 486 fp = (struct l_sigframe *)regs->tf_rsp - 1; 487 mtx_unlock(&psp->ps_mtx); 488 PROC_UNLOCK(p); 489 490 /* 491 * Build the argument list for the signal handler. 492 */ 493 sig = BSD_TO_LINUX_SIGNAL(sig); 494 495 bzero(&frame, sizeof(frame)); 496 497 frame.sf_handler = PTROUT(catcher); 498 frame.sf_sig = sig; 499 500 bsd_to_linux_sigset(mask, &lmask); 501 502 /* 503 * Build the signal context to be used by sigreturn. 504 */ 505 frame.sf_sc.sc_mask = lmask.__bits[0]; 506 frame.sf_sc.sc_gs = regs->tf_gs; 507 frame.sf_sc.sc_fs = regs->tf_fs; 508 frame.sf_sc.sc_es = regs->tf_es; 509 frame.sf_sc.sc_ds = regs->tf_ds; 510 frame.sf_sc.sc_edi = regs->tf_rdi; 511 frame.sf_sc.sc_esi = regs->tf_rsi; 512 frame.sf_sc.sc_ebp = regs->tf_rbp; 513 frame.sf_sc.sc_ebx = regs->tf_rbx; 514 frame.sf_sc.sc_esp = regs->tf_rsp; 515 frame.sf_sc.sc_edx = regs->tf_rdx; 516 frame.sf_sc.sc_ecx = regs->tf_rcx; 517 frame.sf_sc.sc_eax = regs->tf_rax; 518 frame.sf_sc.sc_eip = regs->tf_rip; 519 frame.sf_sc.sc_cs = regs->tf_cs; 520 frame.sf_sc.sc_eflags = regs->tf_rflags; 521 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; 522 frame.sf_sc.sc_ss = regs->tf_ss; 523 frame.sf_sc.sc_err = regs->tf_err; 524 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 525 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 526 527 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 528 frame.sf_extramask[i] = lmask.__bits[i+1]; 529 530 if (copyout(&frame, fp, sizeof(frame)) != 0) { 531 /* 532 * Process has trashed its stack; give it an illegal 533 * instruction to halt it in its tracks. 534 */ 535 PROC_LOCK(p); 536 sigexit(td, SIGILL); 537 } 538 539 /* 540 * Build context to run handler in. 541 */ 542 regs->tf_rsp = PTROUT(fp); 543 regs->tf_rip = linux32_sigcode; 544 regs->tf_rflags &= ~(PSL_T | PSL_D); 545 regs->tf_cs = _ucode32sel; 546 regs->tf_ss = _udatasel; 547 regs->tf_ds = _udatasel; 548 regs->tf_es = _udatasel; 549 regs->tf_fs = _ufssel; 550 regs->tf_gs = _ugssel; 551 regs->tf_flags = TF_HASSEGS; 552 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 553 PROC_LOCK(p); 554 mtx_lock(&psp->ps_mtx); 555} 556 557/* 558 * System call to cleanup state after a signal 559 * has been taken. Reset signal mask and 560 * stack state from context left by sendsig (above). 561 * Return to previous pc and psl as specified by 562 * context left by sendsig. Check carefully to 563 * make sure that the user has not modified the 564 * psl to gain improper privileges or to cause 565 * a machine fault. 566 */ 567int 568linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 569{ 570 struct l_sigframe frame; 571 struct trapframe *regs; 572 sigset_t bmask; 573 l_sigset_t lmask; 574 int eflags, i; 575 ksiginfo_t ksi; 576 577 regs = td->td_frame; 578 579#ifdef DEBUG 580 if (ldebug(sigreturn)) 581 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 582#endif 583 /* 584 * The trampoline code hands us the sigframe. 585 * It is unsafe to keep track of it ourselves, in the event that a 586 * program jumps out of a signal handler. 587 */ 588 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 589 return (EFAULT); 590 591 /* 592 * Check for security violations. 593 */ 594#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 595 eflags = frame.sf_sc.sc_eflags; 596 if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) 597 return(EINVAL); 598 599 /* 600 * Don't allow users to load a valid privileged %cs. Let the 601 * hardware check for invalid selectors, excess privilege in 602 * other selectors, invalid %eip's and invalid %esp's. 603 */ 604#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 605 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 606 ksiginfo_init_trap(&ksi); 607 ksi.ksi_signo = SIGBUS; 608 ksi.ksi_code = BUS_OBJERR; 609 ksi.ksi_trapno = T_PROTFLT; 610 ksi.ksi_addr = (void *)regs->tf_rip; 611 trapsignal(td, &ksi); 612 return(EINVAL); 613 } 614 615 lmask.__bits[0] = frame.sf_sc.sc_mask; 616 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 617 lmask.__bits[i+1] = frame.sf_extramask[i]; 618 linux_to_bsd_sigset(&lmask, &bmask); 619 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 620 621 /* 622 * Restore signal context. 623 */ 624 regs->tf_rdi = frame.sf_sc.sc_edi; 625 regs->tf_rsi = frame.sf_sc.sc_esi; 626 regs->tf_rbp = frame.sf_sc.sc_ebp; 627 regs->tf_rbx = frame.sf_sc.sc_ebx; 628 regs->tf_rdx = frame.sf_sc.sc_edx; 629 regs->tf_rcx = frame.sf_sc.sc_ecx; 630 regs->tf_rax = frame.sf_sc.sc_eax; 631 regs->tf_rip = frame.sf_sc.sc_eip; 632 regs->tf_cs = frame.sf_sc.sc_cs; 633 regs->tf_ds = frame.sf_sc.sc_ds; 634 regs->tf_es = frame.sf_sc.sc_es; 635 regs->tf_fs = frame.sf_sc.sc_fs; 636 regs->tf_gs = frame.sf_sc.sc_gs; 637 regs->tf_rflags = eflags; 638 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal; 639 regs->tf_ss = frame.sf_sc.sc_ss; 640 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 641 642 return (EJUSTRETURN); 643} 644 645/* 646 * System call to cleanup state after a signal 647 * has been taken. Reset signal mask and 648 * stack state from context left by rt_sendsig (above). 649 * Return to previous pc and psl as specified by 650 * context left by sendsig. Check carefully to 651 * make sure that the user has not modified the 652 * psl to gain improper privileges or to cause 653 * a machine fault. 654 */ 655int 656linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 657{ 658 struct l_ucontext uc; 659 struct l_sigcontext *context; 660 sigset_t bmask; 661 l_stack_t *lss; 662 stack_t ss; 663 struct trapframe *regs; 664 int eflags; 665 ksiginfo_t ksi; 666 667 regs = td->td_frame; 668 669#ifdef DEBUG 670 if (ldebug(rt_sigreturn)) 671 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 672#endif 673 /* 674 * The trampoline code hands us the ucontext. 675 * It is unsafe to keep track of it ourselves, in the event that a 676 * program jumps out of a signal handler. 677 */ 678 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 679 return (EFAULT); 680 681 context = &uc.uc_mcontext; 682 683 /* 684 * Check for security violations. 685 */ 686#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 687 eflags = context->sc_eflags; 688 if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) 689 return(EINVAL); 690 691 /* 692 * Don't allow users to load a valid privileged %cs. Let the 693 * hardware check for invalid selectors, excess privilege in 694 * other selectors, invalid %eip's and invalid %esp's. 695 */ 696#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 697 if (!CS_SECURE(context->sc_cs)) { 698 ksiginfo_init_trap(&ksi); 699 ksi.ksi_signo = SIGBUS; 700 ksi.ksi_code = BUS_OBJERR; 701 ksi.ksi_trapno = T_PROTFLT; 702 ksi.ksi_addr = (void *)regs->tf_rip; 703 trapsignal(td, &ksi); 704 return(EINVAL); 705 } 706 707 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); 708 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 709 710 /* 711 * Restore signal context 712 */ 713 regs->tf_gs = context->sc_gs; 714 regs->tf_fs = context->sc_fs; 715 regs->tf_es = context->sc_es; 716 regs->tf_ds = context->sc_ds; 717 regs->tf_rdi = context->sc_edi; 718 regs->tf_rsi = context->sc_esi; 719 regs->tf_rbp = context->sc_ebp; 720 regs->tf_rbx = context->sc_ebx; 721 regs->tf_rdx = context->sc_edx; 722 regs->tf_rcx = context->sc_ecx; 723 regs->tf_rax = context->sc_eax; 724 regs->tf_rip = context->sc_eip; 725 regs->tf_cs = context->sc_cs; 726 regs->tf_rflags = eflags; 727 regs->tf_rsp = context->sc_esp_at_signal; 728 regs->tf_ss = context->sc_ss; 729 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 730 731 /* 732 * call sigaltstack & ignore results.. 733 */ 734 lss = &uc.uc_stack; 735 ss.ss_sp = PTRIN(lss->ss_sp); 736 ss.ss_size = lss->ss_size; 737 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 738 739#ifdef DEBUG 740 if (ldebug(rt_sigreturn)) 741 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 742 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 743#endif 744 (void)kern_sigaltstack(td, &ss, NULL); 745 746 return (EJUSTRETURN); 747} 748 749static int 750linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa) 751{ 752 struct proc *p; 753 struct trapframe *frame; 754 755 p = td->td_proc; 756 frame = td->td_frame; 757 758 sa->args[0] = frame->tf_rbx; 759 sa->args[1] = frame->tf_rcx; 760 sa->args[2] = frame->tf_rdx; 761 sa->args[3] = frame->tf_rsi; 762 sa->args[4] = frame->tf_rdi; 763 sa->args[5] = frame->tf_rbp; /* Unconfirmed */ 764 sa->code = frame->tf_rax; 765 766 if (sa->code >= p->p_sysent->sv_size) 767 sa->callp = &p->p_sysent->sv_table[0]; 768 else 769 sa->callp = &p->p_sysent->sv_table[sa->code]; 770 sa->narg = sa->callp->sy_narg; 771 772 td->td_retval[0] = 0; 773 td->td_retval[1] = frame->tf_rdx; 774 775 return (0); 776} 777 778/* 779 * If a linux binary is exec'ing something, try this image activator 780 * first. We override standard shell script execution in order to 781 * be able to modify the interpreter path. We only do this if a linux 782 * binary is doing the exec, so we do not create an EXEC module for it. 783 */ 784static int exec_linux_imgact_try(struct image_params *iparams); 785 786static int 787exec_linux_imgact_try(struct image_params *imgp) 788{ 789 const char *head = (const char *)imgp->image_header; 790 char *rpath; 791 int error = -1; 792 793 /* 794 * The interpreter for shell scripts run from a linux binary needs 795 * to be located in /compat/linux if possible in order to recursively 796 * maintain linux path emulation. 797 */ 798 if (((const short *)head)[0] == SHELLMAGIC) { 799 /* 800 * Run our normal shell image activator. If it succeeds attempt 801 * to use the alternate path for the interpreter. If an 802 * alternate * path is found, use our stringspace to store it. 803 */ 804 if ((error = exec_shell_imgact(imgp)) == 0) { 805 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 806 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, 807 AT_FDCWD); 808 if (rpath != NULL) 809 imgp->args->fname_buf = 810 imgp->interpreter_name = rpath; 811 } 812 } 813 return (error); 814} 815 816/* 817 * Clear registers on exec 818 * XXX copied from ia32_signal.c. 819 */ 820static void 821exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack) 822{ 823 struct trapframe *regs = td->td_frame; 824 struct pcb *pcb = td->td_pcb; 825 826 mtx_lock(&dt_lock); 827 if (td->td_proc->p_md.md_ldt != NULL) 828 user_ldt_free(td); 829 else 830 mtx_unlock(&dt_lock); 831 832 critical_enter(); 833 wrmsr(MSR_FSBASE, 0); 834 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ 835 pcb->pcb_fsbase = 0; 836 pcb->pcb_gsbase = 0; 837 critical_exit(); 838 pcb->pcb_initial_fpucw = __LINUX_NPXCW__; 839 840 bzero((char *)regs, sizeof(struct trapframe)); 841 regs->tf_rip = imgp->entry_addr; 842 regs->tf_rsp = stack; 843 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); 844 regs->tf_gs = _ugssel; 845 regs->tf_fs = _ufssel; 846 regs->tf_es = _udatasel; 847 regs->tf_ds = _udatasel; 848 regs->tf_ss = _udatasel; 849 regs->tf_flags = TF_HASSEGS; 850 regs->tf_cs = _ucode32sel; 851 regs->tf_rbx = imgp->ps_strings; 852 853 fpstate_drop(td); 854 855 /* Do full restore on return so that we can change to a different %cs */ 856 set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET); 857 td->td_retval[1] = 0; 858} 859 860/* 861 * XXX copied from ia32_sysvec.c. 862 */ 863static register_t * 864linux_copyout_strings(struct image_params *imgp) 865{ 866 int argc, envc; 867 u_int32_t *vectp; 868 char *stringp, *destp; 869 u_int32_t *stack_base; 870 struct linux32_ps_strings *arginfo; 871 char canary[LINUX_AT_RANDOM_LEN]; 872 size_t execpath_len; 873 874 /* 875 * Calculate string base and vector table pointers. 876 */ 877 if (imgp->execpath != NULL && imgp->auxargs != NULL) 878 execpath_len = strlen(imgp->execpath) + 1; 879 else 880 execpath_len = 0; 881 882 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 883 destp = (caddr_t)arginfo - SPARE_USRSPACE - 884 roundup(sizeof(canary), sizeof(char *)) - 885 roundup(execpath_len, sizeof(char *)) - 886 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); 887 888 if (execpath_len != 0) { 889 imgp->execpathp = (uintptr_t)arginfo - execpath_len; 890 copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len); 891 } 892 893 /* 894 * Prepare the canary for SSP. 895 */ 896 arc4rand(canary, sizeof(canary), 0); 897 imgp->canary = (uintptr_t)arginfo - 898 roundup(execpath_len, sizeof(char *)) - 899 roundup(sizeof(canary), sizeof(char *)); 900 copyout(canary, (void *)imgp->canary, sizeof(canary)); 901 902 /* 903 * If we have a valid auxargs ptr, prepare some room 904 * on the stack. 905 */ 906 if (imgp->auxargs) { 907 /* 908 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 909 * lower compatibility. 910 */ 911 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : 912 (LINUX_AT_COUNT * 2); 913 /* 914 * The '+ 2' is for the null pointers at the end of each of 915 * the arg and env vector sets,and imgp->auxarg_size is room 916 * for argument of Runtime loader. 917 */ 918 vectp = (u_int32_t *) (destp - (imgp->args->argc + 919 imgp->args->envc + 2 + imgp->auxarg_size) * 920 sizeof(u_int32_t)); 921 922 } else 923 /* 924 * The '+ 2' is for the null pointers at the end of each of 925 * the arg and env vector sets 926 */ 927 vectp = (u_int32_t *)(destp - (imgp->args->argc + 928 imgp->args->envc + 2) * sizeof(u_int32_t)); 929 930 /* 931 * vectp also becomes our initial stack base 932 */ 933 stack_base = vectp; 934 935 stringp = imgp->args->begin_argv; 936 argc = imgp->args->argc; 937 envc = imgp->args->envc; 938 /* 939 * Copy out strings - arguments and environment. 940 */ 941 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 942 943 /* 944 * Fill in "ps_strings" struct for ps, w, etc. 945 */ 946 suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp); 947 suword32(&arginfo->ps_nargvstr, argc); 948 949 /* 950 * Fill in argument portion of vector table. 951 */ 952 for (; argc > 0; --argc) { 953 suword32(vectp++, (uint32_t)(intptr_t)destp); 954 while (*stringp++ != 0) 955 destp++; 956 destp++; 957 } 958 959 /* a null vector table pointer separates the argp's from the envp's */ 960 suword32(vectp++, 0); 961 962 suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp); 963 suword32(&arginfo->ps_nenvstr, envc); 964 965 /* 966 * Fill in environment portion of vector table. 967 */ 968 for (; envc > 0; --envc) { 969 suword32(vectp++, (uint32_t)(intptr_t)destp); 970 while (*stringp++ != 0) 971 destp++; 972 destp++; 973 } 974 975 /* end of vector table is a null pointer */ 976 suword32(vectp, 0); 977 978 return ((register_t *)stack_base); 979} 980 981static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0, 982 "32-bit Linux emulation"); 983 984static u_long linux32_maxdsiz = LINUX32_MAXDSIZ; 985SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW, 986 &linux32_maxdsiz, 0, ""); 987static u_long linux32_maxssiz = LINUX32_MAXSSIZ; 988SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW, 989 &linux32_maxssiz, 0, ""); 990static u_long linux32_maxvmem = LINUX32_MAXVMEM; 991SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, 992 &linux32_maxvmem, 0, ""); 993 994#if defined(DEBUG) 995SYSCTL_PROC(_compat_linux32, OID_AUTO, debug, 996 CTLTYPE_STRING | CTLFLAG_RW, 997 0, 0, linux_sysctl_debug, "A", 998 "Linux debugging control"); 999#endif 1000 1001static void 1002linux32_fixlimit(struct rlimit *rl, int which) 1003{ 1004 1005 switch (which) { 1006 case RLIMIT_DATA: 1007 if (linux32_maxdsiz != 0) { 1008 if (rl->rlim_cur > linux32_maxdsiz) 1009 rl->rlim_cur = linux32_maxdsiz; 1010 if (rl->rlim_max > linux32_maxdsiz) 1011 rl->rlim_max = linux32_maxdsiz; 1012 } 1013 break; 1014 case RLIMIT_STACK: 1015 if (linux32_maxssiz != 0) { 1016 if (rl->rlim_cur > linux32_maxssiz) 1017 rl->rlim_cur = linux32_maxssiz; 1018 if (rl->rlim_max > linux32_maxssiz) 1019 rl->rlim_max = linux32_maxssiz; 1020 } 1021 break; 1022 case RLIMIT_VMEM: 1023 if (linux32_maxvmem != 0) { 1024 if (rl->rlim_cur > linux32_maxvmem) 1025 rl->rlim_cur = linux32_maxvmem; 1026 if (rl->rlim_max > linux32_maxvmem) 1027 rl->rlim_max = linux32_maxvmem; 1028 } 1029 break; 1030 } 1031} 1032 1033struct sysentvec elf_linux_sysvec = { 1034 .sv_size = LINUX_SYS_MAXSYSCALL, 1035 .sv_table = linux_sysent, 1036 .sv_mask = 0, 1037 .sv_sigsize = LINUX_SIGTBLSZ, 1038 .sv_sigtbl = bsd_to_linux_signal, 1039 .sv_errsize = ELAST + 1, 1040 .sv_errtbl = bsd_to_linux_errno, 1041 .sv_transtrap = translate_traps, 1042 .sv_fixup = elf_linux_fixup, 1043 .sv_sendsig = linux_sendsig, 1044 .sv_sigcode = &_binary_linux32_locore_o_start, 1045 .sv_szsigcode = &linux_szsigcode, 1046 .sv_prepsyscall = NULL, 1047 .sv_name = "Linux ELF32", 1048 .sv_coredump = elf32_coredump, 1049 .sv_imgact_try = exec_linux_imgact_try, 1050 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 1051 .sv_pagesize = PAGE_SIZE, 1052 .sv_minuser = VM_MIN_ADDRESS, 1053 .sv_maxuser = LINUX32_MAXUSER, 1054 .sv_usrstack = LINUX32_USRSTACK, 1055 .sv_psstrings = LINUX32_PS_STRINGS, 1056 .sv_stackprot = VM_PROT_ALL, 1057 .sv_copyout_strings = linux_copyout_strings, 1058 .sv_setregs = exec_linux_setregs, 1059 .sv_fixlimit = linux32_fixlimit, 1060 .sv_maxssiz = &linux32_maxssiz, 1061 .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP, 1062 .sv_set_syscall_retval = cpu_set_syscall_retval, 1063 .sv_fetch_syscall_args = linux32_fetch_syscall_args, 1064 .sv_syscallnames = NULL, 1065 .sv_shared_page_base = LINUX32_SHAREDPAGE, 1066 .sv_shared_page_len = PAGE_SIZE, 1067 .sv_schedtail = linux_schedtail, 1068 .sv_thread_detach = linux_thread_detach, 1069}; 1070 1071static void 1072linux_vdso_install(void *param) 1073{ 1074 1075 linux_szsigcode = (&_binary_linux32_locore_o_end - 1076 &_binary_linux32_locore_o_start); 1077 1078 if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) 1079 panic("Linux invalid vdso size\n"); 1080 1081 __elfN(linux_vdso_fixup)(&elf_linux_sysvec); 1082 1083 linux_shared_page_obj = __elfN(linux_shared_page_init) 1084 (&linux_shared_page_mapping); 1085 1086 __elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX32_SHAREDPAGE); 1087 1088 bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, 1089 linux_szsigcode); 1090 elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; 1091 1092 linux_kplatform = linux_shared_page_mapping + 1093 (linux_platform - (caddr_t)LINUX32_SHAREDPAGE); 1094} 1095SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, 1096 (sysinit_cfunc_t)linux_vdso_install, NULL); 1097 1098static void 1099linux_vdso_deinstall(void *param) 1100{ 1101 1102 __elfN(linux_shared_page_fini)(linux_shared_page_obj); 1103}; 1104SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, 1105 (sysinit_cfunc_t)linux_vdso_deinstall, NULL); 1106 1107static char GNU_ABI_VENDOR[] = "GNU"; 1108static int GNULINUX_ABI_DESC = 0; 1109 1110static boolean_t 1111linux32_trans_osrel(const Elf_Note *note, int32_t *osrel) 1112{ 1113 const Elf32_Word *desc; 1114 uintptr_t p; 1115 1116 p = (uintptr_t)(note + 1); 1117 p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); 1118 1119 desc = (const Elf32_Word *)p; 1120 if (desc[0] != GNULINUX_ABI_DESC) 1121 return (FALSE); 1122 1123 /* 1124 * For linux we encode osrel as follows (see linux_mib.c): 1125 * VVVMMMIII (version, major, minor), see linux_mib.c. 1126 */ 1127 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3]; 1128 1129 return (TRUE); 1130} 1131 1132static Elf_Brandnote linux32_brandnote = { 1133 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), 1134 .hdr.n_descsz = 16, /* XXX at least 16 */ 1135 .hdr.n_type = 1, 1136 .vendor = GNU_ABI_VENDOR, 1137 .flags = BN_TRANSLATE_OSREL, 1138 .trans_osrel = linux32_trans_osrel 1139}; 1140 1141static Elf32_Brandinfo linux_brand = { 1142 .brand = ELFOSABI_LINUX, 1143 .machine = EM_386, 1144 .compat_3_brand = "Linux", 1145 .emul_path = "/compat/linux", 1146 .interp_path = "/lib/ld-linux.so.1", 1147 .sysvec = &elf_linux_sysvec, 1148 .interp_newpath = NULL, 1149 .brand_note = &linux32_brandnote, 1150 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1151}; 1152 1153static Elf32_Brandinfo linux_glibc2brand = { 1154 .brand = ELFOSABI_LINUX, 1155 .machine = EM_386, 1156 .compat_3_brand = "Linux", 1157 .emul_path = "/compat/linux", 1158 .interp_path = "/lib/ld-linux.so.2", 1159 .sysvec = &elf_linux_sysvec, 1160 .interp_newpath = NULL, 1161 .brand_note = &linux32_brandnote, 1162 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1163}; 1164 1165Elf32_Brandinfo *linux_brandlist[] = { 1166 &linux_brand, 1167 &linux_glibc2brand, 1168 NULL 1169}; 1170 1171static int 1172linux_elf_modevent(module_t mod, int type, void *data) 1173{ 1174 Elf32_Brandinfo **brandinfo; 1175 int error; 1176 struct linux_ioctl_handler **lihp; 1177 1178 error = 0; 1179 1180 switch(type) { 1181 case MOD_LOAD: 1182 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1183 ++brandinfo) 1184 if (elf32_insert_brand_entry(*brandinfo) < 0) 1185 error = EINVAL; 1186 if (error == 0) { 1187 SET_FOREACH(lihp, linux_ioctl_handler_set) 1188 linux_ioctl_register_handler(*lihp); 1189 LIST_INIT(&futex_list); 1190 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); 1191 stclohz = (stathz ? stathz : hz); 1192 if (bootverbose) 1193 printf("Linux ELF exec handler installed\n"); 1194 } else 1195 printf("cannot insert Linux ELF brand handler\n"); 1196 break; 1197 case MOD_UNLOAD: 1198 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1199 ++brandinfo) 1200 if (elf32_brand_inuse(*brandinfo)) 1201 error = EBUSY; 1202 if (error == 0) { 1203 for (brandinfo = &linux_brandlist[0]; 1204 *brandinfo != NULL; ++brandinfo) 1205 if (elf32_remove_brand_entry(*brandinfo) < 0) 1206 error = EINVAL; 1207 } 1208 if (error == 0) { 1209 SET_FOREACH(lihp, linux_ioctl_handler_set) 1210 linux_ioctl_unregister_handler(*lihp); 1211 mtx_destroy(&futex_mtx); 1212 if (bootverbose) 1213 printf("Linux ELF exec handler removed\n"); 1214 } else 1215 printf("Could not deinstall ELF interpreter entry\n"); 1216 break; 1217 default: 1218 return (EOPNOTSUPP); 1219 } 1220 return (error); 1221} 1222 1223static moduledata_t linux_elf_mod = { 1224 "linuxelf", 1225 linux_elf_modevent, 1226 0 1227}; 1228 1229DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 1230MODULE_DEPEND(linuxelf, linux_common, 1, 1, 1); 1231