linux_sysvec.c revision 59368
1/*- 2 * Copyright (c) 1994-1996 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software withough specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $FreeBSD: head/sys/i386/linux/linux_sysvec.c 59368 2000-04-18 15:15:39Z phk $ 29 */ 30 31/* XXX we use functions that might not exist. */ 32#include "opt_compat.h" 33 34#ifndef COMPAT_43 35#error "Unable to compile Linux-emulator due to missing COMPAT_43 option!" 36#endif 37 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/proc.h> 41#include <sys/sysent.h> 42#include <sys/imgact.h> 43#include <sys/imgact_aout.h> 44#include <sys/imgact_elf.h> 45#include <sys/signalvar.h> 46#include <sys/malloc.h> 47#include <vm/vm.h> 48#include <vm/vm_param.h> 49#include <vm/vm_page.h> 50#include <vm/vm_extern.h> 51#include <sys/exec.h> 52#include <sys/kernel.h> 53#include <sys/module.h> 54#include <machine/cpu.h> 55 56#include <i386/linux/linux.h> 57#include <i386/linux/linux_proto.h> 58 59MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 60 61extern char linux_sigcode[]; 62extern int linux_szsigcode; 63 64extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 65 66extern struct linker_set linux_ioctl_handler_set; 67 68static int linux_fixup __P((register_t **stack_base, 69 struct image_params *iparams)); 70static int elf_linux_fixup __P((register_t **stack_base, 71 struct image_params *iparams)); 72static void linux_prepsyscall __P((struct trapframe *tf, int *args, 73 u_int *code, caddr_t *params)); 74static void linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask, 75 u_long code)); 76 77/* 78 * Linux syscalls return negative errno's, we do positive and map them 79 */ 80static int bsd_to_linux_errno[ELAST + 1] = { 81 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 82 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 83 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 84 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 85 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 86 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 87 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 88 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 89 -6, -6, -43, -42, -75, -6, -84 90}; 91 92int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 93 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 94 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 95 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0, 96 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 97 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 98 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 99 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 100 0, LINUX_SIGUSR1, LINUX_SIGUSR2 101}; 102 103int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 104 SIGHUP, SIGINT, SIGQUIT, SIGILL, 105 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 106 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 107 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 108 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 109 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 110 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 111 SIGIO, SIGURG, 0 112}; 113 114/* 115 * If FreeBSD & Linux have a difference of opinion about what a trap 116 * means, deal with it here. 117 */ 118static int 119translate_traps(int signal, int trap_code) 120{ 121 if (signal != SIGBUS) 122 return signal; 123 switch (trap_code) { 124 case T_PROTFLT: 125 case T_TSSFLT: 126 case T_DOUBLEFLT: 127 case T_PAGEFLT: 128 return SIGSEGV; 129 default: 130 return signal; 131 } 132} 133 134static int 135linux_fixup(register_t **stack_base, struct image_params *imgp) 136{ 137 register_t *argv, *envp; 138 139 argv = *stack_base; 140 envp = *stack_base + (imgp->argc + 1); 141 (*stack_base)--; 142 **stack_base = (intptr_t)(void *)envp; 143 (*stack_base)--; 144 **stack_base = (intptr_t)(void *)argv; 145 (*stack_base)--; 146 **stack_base = imgp->argc; 147 return 0; 148} 149 150static int 151elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 152{ 153 Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs; 154 register_t *pos; 155 156 pos = *stack_base + (imgp->argc + imgp->envc + 2); 157 158 if (args->trace) { 159 AUXARGS_ENTRY(pos, AT_DEBUG, 1); 160 } 161 if (args->execfd != -1) { 162 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 163 } 164 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 165 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 166 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 167 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 168 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 169 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 170 AUXARGS_ENTRY(pos, AT_BASE, args->base); 171 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid); 172 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid); 173 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid); 174 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid); 175 AUXARGS_ENTRY(pos, AT_NULL, 0); 176 177 free(imgp->auxargs, M_TEMP); 178 imgp->auxargs = NULL; 179 180 (*stack_base)--; 181 **stack_base = (long)imgp->argc; 182 return 0; 183} 184 185extern int _ucodesel, _udatasel; 186 187/* 188 * Send an interrupt to process. 189 * 190 * Stack is set up to allow sigcode stored 191 * in u. to call routine, followed by kcall 192 * to sigreturn routine below. After sigreturn 193 * resets the signal mask, the stack, and the 194 * frame pointer, it returns to the user 195 * specified pc, psl. 196 */ 197 198static void 199linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 200{ 201 register struct proc *p = curproc; 202 register struct trapframe *regs; 203 struct linux_sigframe *fp, frame; 204 struct sigacts *psp = p->p_sigacts; 205 int oonstack; 206 207 regs = p->p_md.md_regs; 208 oonstack = p->p_sigstk.ss_flags & SS_ONSTACK; 209 210#ifdef DEBUG 211 printf("Linux-emul(%ld): linux_sendsig(%p, %d, %p, %lu)\n", 212 (long)p->p_pid, catcher, sig, (void*)mask, code); 213#endif 214 /* 215 * Allocate space for the signal handler context. 216 */ 217 if ((p->p_flag & P_ALTSTACK) && !oonstack && 218 SIGISMEMBER(psp->ps_sigonstack, sig)) { 219 fp = (struct linux_sigframe *)(p->p_sigstk.ss_sp + 220 p->p_sigstk.ss_size - sizeof(struct linux_sigframe)); 221 p->p_sigstk.ss_flags |= SS_ONSTACK; 222 } else { 223 fp = (struct linux_sigframe *)regs->tf_esp - 1; 224 } 225 226 /* 227 * grow() will return FALSE if the fp will not fit inside the stack 228 * and the stack can not be grown. useracc will return FALSE 229 * if access is denied. 230 */ 231 if ((grow_stack (p, (int)fp) == FALSE) || 232 !useracc((caddr_t)fp, sizeof (struct linux_sigframe), 233 VM_PROT_WRITE)) { 234 /* 235 * Process has trashed its stack; give it an illegal 236 * instruction to halt it in its tracks. 237 */ 238 SIGACTION(p, SIGILL) = SIG_DFL; 239 SIGDELSET(p->p_sigignore, SIGILL); 240 SIGDELSET(p->p_sigcatch, SIGILL); 241 SIGDELSET(p->p_sigmask, SIGILL); 242 psignal(p, SIGILL); 243 return; 244 } 245 246 /* 247 * Build the argument list for the signal handler. 248 */ 249 if (p->p_sysent->sv_sigtbl) 250 if (sig <= p->p_sysent->sv_sigsize) 251 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 252 253 frame.sf_handler = catcher; 254 frame.sf_sig = sig; 255 256 /* 257 * Build the signal context to be used by sigreturn. 258 */ 259 frame.sf_sc.sc_mask = mask->__bits[0]; 260 frame.sf_sc.sc_gs = rgs(); 261 frame.sf_sc.sc_fs = regs->tf_fs; 262 frame.sf_sc.sc_es = regs->tf_es; 263 frame.sf_sc.sc_ds = regs->tf_ds; 264 frame.sf_sc.sc_edi = regs->tf_edi; 265 frame.sf_sc.sc_esi = regs->tf_esi; 266 frame.sf_sc.sc_ebp = regs->tf_ebp; 267 frame.sf_sc.sc_ebx = regs->tf_ebx; 268 frame.sf_sc.sc_edx = regs->tf_edx; 269 frame.sf_sc.sc_ecx = regs->tf_ecx; 270 frame.sf_sc.sc_eax = regs->tf_eax; 271 frame.sf_sc.sc_eip = regs->tf_eip; 272 frame.sf_sc.sc_cs = regs->tf_cs; 273 frame.sf_sc.sc_eflags = regs->tf_eflags; 274 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 275 frame.sf_sc.sc_ss = regs->tf_ss; 276 frame.sf_sc.sc_err = regs->tf_err; 277 frame.sf_sc.sc_trapno = code; /* XXX ???? */ 278 279 if (copyout(&frame, fp, sizeof(frame)) != 0) { 280 /* 281 * Process has trashed its stack; give it an illegal 282 * instruction to halt it in its tracks. 283 */ 284 sigexit(p, SIGILL); 285 /* NOTREACHED */ 286 } 287 288 /* 289 * Build context to run handler in. 290 */ 291 regs->tf_esp = (int)fp; 292 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); 293 regs->tf_eflags &= ~PSL_VM; 294 regs->tf_cs = _ucodesel; 295 regs->tf_ds = _udatasel; 296 regs->tf_es = _udatasel; 297 regs->tf_fs = _udatasel; 298 load_gs(_udatasel); 299 regs->tf_ss = _udatasel; 300} 301 302/* 303 * System call to cleanup state after a signal 304 * has been taken. Reset signal mask and 305 * stack state from context left by sendsig (above). 306 * Return to previous pc and psl as specified by 307 * context left by sendsig. Check carefully to 308 * make sure that the user has not modified the 309 * psl to gain improper privileges or to cause 310 * a machine fault. 311 */ 312int 313linux_sigreturn(p, args) 314 struct proc *p; 315 struct linux_sigreturn_args *args; 316{ 317 struct linux_sigcontext *scp, context; 318 register struct trapframe *regs; 319 int eflags; 320 321 regs = p->p_md.md_regs; 322 323#ifdef DEBUG 324 printf("Linux-emul(%ld): linux_sigreturn(%p)\n", 325 (long)p->p_pid, (void *)args->scp); 326#endif 327 /* 328 * The trampoline code hands us the context. 329 * It is unsafe to keep track of it ourselves, in the event that a 330 * program jumps out of a signal handler. 331 */ 332 scp = SCARG(args,scp); 333 if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0) 334 return (EFAULT); 335 336 /* 337 * Check for security violations. 338 */ 339#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 340 eflags = context.sc_eflags; 341 /* 342 * XXX do allow users to change the privileged flag PSL_RF. The 343 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 344 * sometimes set it there too. tf_eflags is kept in the signal 345 * context during signal handling and there is no other place 346 * to remember it, so the PSL_RF bit may be corrupted by the 347 * signal handler without us knowing. Corruption of the PSL_RF 348 * bit at worst causes one more or one less debugger trap, so 349 * allowing it is fairly harmless. 350 */ 351 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { 352 return(EINVAL); 353 } 354 355 /* 356 * Don't allow users to load a valid privileged %cs. Let the 357 * hardware check for invalid selectors, excess privilege in 358 * other selectors, invalid %eip's and invalid %esp's. 359 */ 360#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 361 if (!CS_SECURE(context.sc_cs)) { 362 trapsignal(p, SIGBUS, T_PROTFLT); 363 return(EINVAL); 364 } 365 366 p->p_sigstk.ss_flags &= ~SS_ONSTACK; 367 SIGSETOLD(p->p_sigmask, context.sc_mask); 368 SIG_CANTMASK(p->p_sigmask); 369 370 /* 371 * Restore signal context. 372 */ 373 /* %gs was restored by the trampoline. */ 374 regs->tf_fs = context.sc_fs; 375 regs->tf_es = context.sc_es; 376 regs->tf_ds = context.sc_ds; 377 regs->tf_edi = context.sc_edi; 378 regs->tf_esi = context.sc_esi; 379 regs->tf_ebp = context.sc_ebp; 380 regs->tf_ebx = context.sc_ebx; 381 regs->tf_edx = context.sc_edx; 382 regs->tf_ecx = context.sc_ecx; 383 regs->tf_eax = context.sc_eax; 384 regs->tf_eip = context.sc_eip; 385 regs->tf_cs = context.sc_cs; 386 regs->tf_eflags = eflags; 387 regs->tf_esp = context.sc_esp_at_signal; 388 regs->tf_ss = context.sc_ss; 389 390 return (EJUSTRETURN); 391} 392 393static void 394linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 395{ 396 args[0] = tf->tf_ebx; 397 args[1] = tf->tf_ecx; 398 args[2] = tf->tf_edx; 399 args[3] = tf->tf_esi; 400 args[4] = tf->tf_edi; 401 *params = NULL; /* no copyin */ 402} 403 404struct sysentvec linux_sysvec = { 405 LINUX_SYS_MAXSYSCALL, 406 linux_sysent, 407 0xff, 408 LINUX_SIGTBLSZ, 409 bsd_to_linux_signal, 410 ELAST + 1, 411 bsd_to_linux_errno, 412 translate_traps, 413 linux_fixup, 414 linux_sendsig, 415 linux_sigcode, 416 &linux_szsigcode, 417 linux_prepsyscall, 418 "Linux a.out", 419 aout_coredump 420}; 421 422struct sysentvec elf_linux_sysvec = { 423 LINUX_SYS_MAXSYSCALL, 424 linux_sysent, 425 0xff, 426 LINUX_SIGTBLSZ, 427 bsd_to_linux_signal, 428 ELAST + 1, 429 bsd_to_linux_errno, 430 translate_traps, 431 elf_linux_fixup, 432 linux_sendsig, 433 linux_sigcode, 434 &linux_szsigcode, 435 linux_prepsyscall, 436 "Linux ELF", 437 elf_coredump 438}; 439 440static Elf32_Brandinfo linux_brand = { 441 ELFOSABI_LINUX, 442 "/compat/linux", 443 "/lib/ld-linux.so.1", 444 &elf_linux_sysvec 445 }; 446 447static Elf32_Brandinfo linux_glibc2brand = { 448 ELFOSABI_LINUX, 449 "/compat/linux", 450 "/lib/ld-linux.so.2", 451 &elf_linux_sysvec 452 }; 453 454Elf32_Brandinfo *linux_brandlist[] = { 455 &linux_brand, 456 &linux_glibc2brand, 457 NULL 458 }; 459 460static int 461linux_elf_modevent(module_t mod, int type, void *data) 462{ 463 Elf32_Brandinfo **brandinfo; 464 int error; 465 466 error = 0; 467 468 switch(type) { 469 case MOD_LOAD: 470 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 471 ++brandinfo) 472 if (elf_insert_brand_entry(*brandinfo) < 0) 473 error = EINVAL; 474 if (error) 475 printf("cannot insert Linux elf brand handler\n"); 476 else { 477 linux_ioctl_register_handlers(&linux_ioctl_handler_set); 478 if (bootverbose) 479 printf("Linux-ELF exec handler installed\n"); 480 } 481 break; 482 case MOD_UNLOAD: 483 linux_ioctl_unregister_handlers(&linux_ioctl_handler_set); 484 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 485 ++brandinfo) 486 if (elf_brand_inuse(*brandinfo)) 487 error = EBUSY; 488 489 if (error == 0) { 490 for (brandinfo = &linux_brandlist[0]; 491 *brandinfo != NULL; ++brandinfo) 492 if (elf_remove_brand_entry(*brandinfo) < 0) 493 error = EINVAL; 494 } 495 if (error) 496 printf("Could not deinstall ELF interpreter entry\n"); 497 else if (bootverbose) 498 printf("Linux-elf exec handler removed\n"); 499 break; 500 default: 501 break; 502 } 503 return error; 504} 505static moduledata_t linux_elf_mod = { 506 "linuxelf", 507 linux_elf_modevent, 508 0 509}; 510DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 511