1/* $NetBSD: linux_machdep.c,v 1.150 2011/03/04 22:25:31 joerg Exp $ */ 2 3/*- 4 * Copyright (c) 1995, 2000, 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Frank van der Linden, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33__KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.150 2011/03/04 22:25:31 joerg Exp $"); 34 35#if defined(_KERNEL_OPT) 36#include "opt_vm86.h" 37#include "opt_user_ldt.h" 38#endif 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/signalvar.h> 43#include <sys/kernel.h> 44#include <sys/proc.h> 45#include <sys/buf.h> 46#include <sys/reboot.h> 47#include <sys/conf.h> 48#include <sys/exec.h> 49#include <sys/file.h> 50#include <sys/callout.h> 51#include <sys/malloc.h> 52#include <sys/mbuf.h> 53#include <sys/msgbuf.h> 54#include <sys/mount.h> 55#include <sys/vnode.h> 56#include <sys/device.h> 57#include <sys/syscallargs.h> 58#include <sys/filedesc.h> 59#include <sys/exec_elf.h> 60#include <sys/disklabel.h> 61#include <sys/ioctl.h> 62#include <sys/wait.h> 63#include <sys/kauth.h> 64#include <sys/kmem.h> 65 66#include <miscfs/specfs/specdev.h> 67 68#include <compat/linux/common/linux_types.h> 69#include <compat/linux/common/linux_signal.h> 70#include <compat/linux/common/linux_util.h> 71#include <compat/linux/common/linux_ioctl.h> 72#include <compat/linux/common/linux_hdio.h> 73#include <compat/linux/common/linux_exec.h> 74#include <compat/linux/common/linux_machdep.h> 75#include <compat/linux/common/linux_errno.h> 76 77#include <compat/linux/linux_syscallargs.h> 78 79#include <sys/cpu.h> 80#include <machine/cpufunc.h> 81#include <machine/psl.h> 82#include <machine/reg.h> 83#include <machine/segments.h> 84#include <machine/specialreg.h> 85#include <machine/sysarch.h> 86#include <machine/vm86.h> 87#include <machine/vmparam.h> 88 89/* 90 * To see whether wscons is configured (for virtual console ioctl calls). 91 */ 92#if defined(_KERNEL_OPT) 93#include "wsdisplay.h" 94#endif 95#if (NWSDISPLAY > 0) 96#include <dev/wscons/wsconsio.h> 97#include <dev/wscons/wsdisplay_usl_io.h> 98#if defined(_KERNEL_OPT) 99#include "opt_xserver.h" 100#endif 101#endif 102 103#ifdef DEBUG_LINUX 104#define DPRINTF(a) uprintf a 105#else 106#define DPRINTF(a) 107#endif 108 109static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *); 110extern struct disklist *x86_alldisks; 111static void linux_save_ucontext(struct lwp *, struct trapframe *, 112 const sigset_t *, struct sigaltstack *, struct linux_ucontext *); 113static void linux_save_sigcontext(struct lwp *, struct trapframe *, 114 const sigset_t *, struct linux_sigcontext *); 115static int linux_restore_sigcontext(struct lwp *, 116 struct linux_sigcontext *, register_t *); 117static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *); 118static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *); 119 120extern char linux_sigcode[], linux_rt_sigcode[]; 121 122/* 123 * Deal with some i386-specific things in the Linux emulation code. 124 */ 125 126void 127linux_setregs(struct lwp *l, struct exec_package *epp, vaddr_t stack) 128{ 129 struct pcb *pcb = lwp_getpcb(l); 130 struct trapframe *tf; 131 132#if NNPX > 0 133 /* If we were using the FPU, forget about it. */ 134 if (npxproc == l) 135 npxdrop(); 136#endif 137 138#ifdef USER_LDT 139 pmap_ldt_cleanup(l); 140#endif 141 142 l->l_md.md_flags &= ~MDL_USEDFPU; 143 144 if (i386_use_fxsave) { 145 pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__; 146 pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__; 147 } else 148 pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__; 149 150 tf = l->l_md.md_regs; 151 tf->tf_gs = 0; 152 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL); 153 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); 154 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); 155 tf->tf_edi = 0; 156 tf->tf_esi = 0; 157 tf->tf_ebp = 0; 158 tf->tf_ebx = l->l_proc->p_psstrp; 159 tf->tf_edx = 0; 160 tf->tf_ecx = 0; 161 tf->tf_eax = 0; 162 tf->tf_eip = epp->ep_entry; 163 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL); 164 tf->tf_eflags = PSL_USERSET; 165 tf->tf_esp = stack; 166 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 167} 168 169/* 170 * Send an interrupt to process. 171 * 172 * Stack is set up to allow sigcode stored 173 * in u. to call routine, followed by kcall 174 * to sigreturn routine below. After sigreturn 175 * resets the signal mask, the stack, and the 176 * frame pointer, it returns to the user 177 * specified pc, psl. 178 */ 179 180void 181linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask) 182{ 183 if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO) 184 linux_rt_sendsig(ksi, mask); 185 else 186 linux_old_sendsig(ksi, mask); 187} 188 189 190static void 191linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct sigaltstack *sas, struct linux_ucontext *uc) 192{ 193 uc->uc_flags = 0; 194 uc->uc_link = NULL; 195 native_to_linux_sigaltstack(&uc->uc_stack, sas); 196 linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext); 197 native_to_linux_sigset(&uc->uc_sigmask, mask); 198 (void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem)); 199} 200 201static void 202linux_save_sigcontext(struct lwp *l, struct trapframe *tf, 203 const sigset_t *mask, struct linux_sigcontext *sc) 204{ 205 struct pcb *pcb = lwp_getpcb(l); 206 207 /* Save register context. */ 208#ifdef VM86 209 if (tf->tf_eflags & PSL_VM) { 210 sc->sc_gs = tf->tf_vm86_gs; 211 sc->sc_fs = tf->tf_vm86_fs; 212 sc->sc_es = tf->tf_vm86_es; 213 sc->sc_ds = tf->tf_vm86_ds; 214 sc->sc_eflags = get_vflags(l); 215 } else 216#endif 217 { 218 sc->sc_gs = tf->tf_gs; 219 sc->sc_fs = tf->tf_fs; 220 sc->sc_es = tf->tf_es; 221 sc->sc_ds = tf->tf_ds; 222 sc->sc_eflags = tf->tf_eflags; 223 } 224 sc->sc_edi = tf->tf_edi; 225 sc->sc_esi = tf->tf_esi; 226 sc->sc_esp = tf->tf_esp; 227 sc->sc_ebp = tf->tf_ebp; 228 sc->sc_ebx = tf->tf_ebx; 229 sc->sc_edx = tf->tf_edx; 230 sc->sc_ecx = tf->tf_ecx; 231 sc->sc_eax = tf->tf_eax; 232 sc->sc_eip = tf->tf_eip; 233 sc->sc_cs = tf->tf_cs; 234 sc->sc_esp_at_signal = tf->tf_esp; 235 sc->sc_ss = tf->tf_ss; 236 sc->sc_err = tf->tf_err; 237 sc->sc_trapno = tf->tf_trapno; 238 sc->sc_cr2 = pcb->pcb_cr2; 239 sc->sc_387 = NULL; 240 241 /* Save signal stack. */ 242 /* Linux doesn't save the onstack flag in sigframe */ 243 244 /* Save signal mask. */ 245 native_to_linux_old_sigset(&sc->sc_mask, mask); 246} 247 248static void 249linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask) 250{ 251 struct lwp *l = curlwp; 252 struct proc *p = l->l_proc; 253 struct trapframe *tf; 254 struct linux_rt_sigframe *fp, frame; 255 int onstack, error; 256 int sig = ksi->ksi_signo; 257 sig_t catcher = SIGACTION(p, sig).sa_handler; 258 struct sigaltstack *sas = &l->l_sigstk; 259 260 tf = l->l_md.md_regs; 261 /* Do we need to jump onto the signal stack? */ 262 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && 263 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; 264 265 266 /* Allocate space for the signal handler context. */ 267 if (onstack) 268 fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp + 269 sas->ss_size); 270 else 271 fp = (struct linux_rt_sigframe *)tf->tf_esp; 272 fp--; 273 274 DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n", 275 onstack, fp, sig, tf->tf_eip, 276 ((struct pcb *)lwp_getpcb(l))->pcb_cr2)); 277 278 /* Build stack frame for signal trampoline. */ 279 frame.sf_handler = catcher; 280 frame.sf_sig = native_to_linux_signo[sig]; 281 frame.sf_sip = &fp->sf_si; 282 frame.sf_ucp = &fp->sf_uc; 283 284 /* 285 * XXX: the following code assumes that the constants for 286 * siginfo are the same between linux and NetBSD. 287 */ 288 native_to_linux_siginfo(&frame.sf_si, &ksi->ksi_info); 289 290 /* Save register context. */ 291 linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc); 292 sendsig_reset(l, sig); 293 294 mutex_exit(p->p_lock); 295 error = copyout(&frame, fp, sizeof(frame)); 296 mutex_enter(p->p_lock); 297 298 if (error != 0) { 299 /* 300 * Process has trashed its stack; give it an illegal 301 * instruction to halt it in its tracks. 302 */ 303 sigexit(l, SIGILL); 304 /* NOTREACHED */ 305 } 306 307 /* 308 * Build context to run handler in. 309 */ 310 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL); 311 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); 312 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); 313 tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) + 314 (linux_rt_sigcode - linux_sigcode); 315 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 316 tf->tf_eflags &= ~PSL_CLEARSIG; 317 tf->tf_esp = (int)fp; 318 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 319 320 /* Remember that we're now on the signal stack. */ 321 if (onstack) 322 sas->ss_flags |= SS_ONSTACK; 323} 324 325static void 326linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask) 327{ 328 struct lwp *l = curlwp; 329 struct proc *p = l->l_proc; 330 struct trapframe *tf; 331 struct linux_sigframe *fp, frame; 332 int onstack, error; 333 int sig = ksi->ksi_signo; 334 sig_t catcher = SIGACTION(p, sig).sa_handler; 335 struct sigaltstack *sas = &l->l_sigstk; 336 337 tf = l->l_md.md_regs; 338 339 /* Do we need to jump onto the signal stack? */ 340 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && 341 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; 342 343 /* Allocate space for the signal handler context. */ 344 if (onstack) 345 fp = (struct linux_sigframe *) ((char *)sas->ss_sp + 346 sas->ss_size); 347 else 348 fp = (struct linux_sigframe *)tf->tf_esp; 349 fp--; 350 351 DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n", 352 onstack, fp, sig, tf->tf_eip, 353 ((struct pcb *)lwp_getpcb(l))->pcb_cr2)); 354 355 /* Build stack frame for signal trampoline. */ 356 frame.sf_handler = catcher; 357 frame.sf_sig = native_to_linux_signo[sig]; 358 359 linux_save_sigcontext(l, tf, mask, &frame.sf_sc); 360 sendsig_reset(l, sig); 361 362 mutex_exit(p->p_lock); 363 error = copyout(&frame, fp, sizeof(frame)); 364 mutex_enter(p->p_lock); 365 366 if (error != 0) { 367 /* 368 * Process has trashed its stack; give it an illegal 369 * instruction to halt it in its tracks. 370 */ 371 sigexit(l, SIGILL); 372 /* NOTREACHED */ 373 } 374 375 /* 376 * Build context to run handler in. 377 */ 378 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL); 379 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); 380 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); 381 tf->tf_eip = (int)p->p_sigctx.ps_sigcode; 382 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL); 383 tf->tf_eflags &= ~PSL_CLEARSIG; 384 tf->tf_esp = (int)fp; 385 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 386 387 /* Remember that we're now on the signal stack. */ 388 if (onstack) 389 sas->ss_flags |= SS_ONSTACK; 390} 391 392/* 393 * System call to cleanup state after a signal 394 * has been taken. Reset signal mask and 395 * stack state from context left by sendsig (above). 396 * Return to previous pc and psl as specified by 397 * context left by sendsig. Check carefully to 398 * make sure that the user has not modified the 399 * psl to gain improper privileges or to cause 400 * a machine fault. 401 */ 402int 403linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval) 404{ 405 /* { 406 syscallarg(struct linux_ucontext *) ucp; 407 } */ 408 struct linux_ucontext context, *ucp = SCARG(uap, ucp); 409 int error; 410 411 /* 412 * The trampoline code hands us the context. 413 * It is unsafe to keep track of it ourselves, in the event that a 414 * program jumps out of a signal handler. 415 */ 416 if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0) 417 return error; 418 419 /* XXX XAX we can do better here by using more of the ucontext */ 420 return linux_restore_sigcontext(l, &context.uc_mcontext, retval); 421} 422 423int 424linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval) 425{ 426 /* { 427 syscallarg(struct linux_sigcontext *) scp; 428 } */ 429 struct linux_sigcontext context, *scp = SCARG(uap, scp); 430 int error; 431 432 /* 433 * The trampoline code hands us the context. 434 * It is unsafe to keep track of it ourselves, in the event that a 435 * program jumps out of a signal handler. 436 */ 437 if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0) 438 return error; 439 return linux_restore_sigcontext(l, &context, retval); 440} 441 442static int 443linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp, 444 register_t *retval) 445{ 446 struct proc *p = l->l_proc; 447 struct sigaltstack *sas = &l->l_sigstk; 448 struct trapframe *tf; 449 sigset_t mask; 450 ssize_t ss_gap; 451 452 /* Restore register context. */ 453 tf = l->l_md.md_regs; 454 DPRINTF(("sigreturn enter esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip)); 455 456#ifdef VM86 457 if (scp->sc_eflags & PSL_VM) { 458 void syscall_vm86(struct trapframe *); 459 460 tf->tf_vm86_gs = scp->sc_gs; 461 tf->tf_vm86_fs = scp->sc_fs; 462 tf->tf_vm86_es = scp->sc_es; 463 tf->tf_vm86_ds = scp->sc_ds; 464 set_vflags(l, scp->sc_eflags); 465 p->p_md.md_syscall = syscall_vm86; 466 } else 467#endif 468 { 469 /* 470 * Check for security violations. If we're returning to 471 * protected mode, the CPU will validate the segment registers 472 * automatically and generate a trap on violations. We handle 473 * the trap, rather than doing all of the checking here. 474 */ 475 if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 || 476 !USERMODE(scp->sc_cs, scp->sc_eflags)) 477 return EINVAL; 478 479 tf->tf_gs = scp->sc_gs; 480 tf->tf_fs = scp->sc_fs; 481 tf->tf_es = scp->sc_es; 482 tf->tf_ds = scp->sc_ds; 483#ifdef VM86 484 if (tf->tf_eflags & PSL_VM) 485 (*p->p_emul->e_syscall_intern)(p); 486#endif 487 tf->tf_eflags = scp->sc_eflags; 488 } 489 tf->tf_edi = scp->sc_edi; 490 tf->tf_esi = scp->sc_esi; 491 tf->tf_ebp = scp->sc_ebp; 492 tf->tf_ebx = scp->sc_ebx; 493 tf->tf_edx = scp->sc_edx; 494 tf->tf_ecx = scp->sc_ecx; 495 tf->tf_eax = scp->sc_eax; 496 tf->tf_eip = scp->sc_eip; 497 tf->tf_cs = scp->sc_cs; 498 tf->tf_esp = scp->sc_esp_at_signal; 499 tf->tf_ss = scp->sc_ss; 500 501 /* Restore signal stack. */ 502 /* 503 * Linux really does it this way; it doesn't have space in sigframe 504 * to save the onstack flag. 505 */ 506 mutex_enter(p->p_lock); 507 ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp); 508 if (ss_gap >= 0 && ss_gap < sas->ss_size) 509 sas->ss_flags |= SS_ONSTACK; 510 else 511 sas->ss_flags &= ~SS_ONSTACK; 512 513 /* Restore signal mask. */ 514 linux_old_to_native_sigset(&mask, &scp->sc_mask); 515 (void) sigprocmask1(l, SIG_SETMASK, &mask, 0); 516 mutex_exit(p->p_lock); 517 518 DPRINTF(("sigreturn exit esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip)); 519 return EJUSTRETURN; 520} 521 522#ifdef USER_LDT 523 524static int 525linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, 526 register_t *retval) 527{ 528 struct x86_get_ldt_args gl; 529 int error; 530 union descriptor *ldt_buf; 531 size_t sz; 532 533 /* 534 * I've checked the linux code - this function is asymetric with 535 * linux_write_ldt, and returns raw ldt entries. 536 * NB, the code I saw zerod the spare parts of the user buffer. 537 */ 538 539 DPRINTF(("linux_read_ldt!")); 540 541 sz = 8192 * sizeof(*ldt_buf); 542 ldt_buf = kmem_zalloc(sz, KM_SLEEP); 543 gl.start = 0; 544 gl.desc = NULL; 545 gl.num = SCARG(uap, bytecount) / sizeof(union descriptor); 546 error = x86_get_ldt1(l, &gl, ldt_buf); 547 /* NB gl.num might have changed */ 548 if (error == 0) { 549 *retval = gl.num * sizeof *ldt; 550 error = copyout(ldt_buf, SCARG(uap, ptr), 551 gl.num * sizeof *ldt_buf); 552 } 553 kmem_free(ldt_buf, sz); 554 555 return error; 556} 557 558struct linux_ldt_info { 559 u_int entry_number; 560 u_long base_addr; 561 u_int limit; 562 u_int seg_32bit:1; 563 u_int contents:2; 564 u_int read_exec_only:1; 565 u_int limit_in_pages:1; 566 u_int seg_not_present:1; 567 u_int useable:1; 568}; 569 570static int 571linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, 572 int oldmode) 573{ 574 struct linux_ldt_info ldt_info; 575 union descriptor d; 576 struct x86_set_ldt_args sl; 577 int error; 578 579 DPRINTF(("linux_write_ldt %d\n", oldmode)); 580 if (SCARG(uap, bytecount) != sizeof(ldt_info)) 581 return (EINVAL); 582 if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0) 583 return error; 584 if (ldt_info.entry_number >= 8192) 585 return (EINVAL); 586 if (ldt_info.contents == 3) { 587 if (oldmode) 588 return (EINVAL); 589 if (ldt_info.seg_not_present) 590 return (EINVAL); 591 } 592 593 if (ldt_info.base_addr == 0 && ldt_info.limit == 0 && 594 (oldmode || (ldt_info.contents == 0 && 595 ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 && 596 ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 && 597 ldt_info.useable == 0))) { 598 /* this means you should zero the ldt */ 599 (void)memset(&d, 0, sizeof(d)); 600 } else { 601 d.sd.sd_lobase = ldt_info.base_addr & 0xffffff; 602 d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff; 603 d.sd.sd_lolimit = ldt_info.limit & 0xffff; 604 d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf; 605 d.sd.sd_type = 16 | (ldt_info.contents << 2) | 606 (!ldt_info.read_exec_only << 1); 607 d.sd.sd_dpl = SEL_UPL; 608 d.sd.sd_p = !ldt_info.seg_not_present; 609 d.sd.sd_def32 = ldt_info.seg_32bit; 610 d.sd.sd_gran = ldt_info.limit_in_pages; 611 if (!oldmode) 612 d.sd.sd_xx = ldt_info.useable; 613 else 614 d.sd.sd_xx = 0; 615 } 616 sl.start = ldt_info.entry_number; 617 sl.desc = NULL; 618 sl.num = 1; 619 620 DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n", 621 ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit)); 622 623 return x86_set_ldt1(l, &sl, &d); 624} 625 626#endif /* USER_LDT */ 627 628int 629linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval) 630{ 631 /* { 632 syscallarg(int) func; 633 syscallarg(void *) ptr; 634 syscallarg(size_t) bytecount; 635 } */ 636 637 switch (SCARG(uap, func)) { 638#ifdef USER_LDT 639 case 0: 640 return linux_read_ldt(l, (const void *)uap, retval); 641 case 1: 642 return linux_write_ldt(l, (const void *)uap, 1); 643 case 2: 644#ifdef notyet 645 return linux_read_default_ldt(l, (const void *)uap, retval); 646#else 647 return (ENOSYS); 648#endif 649 case 0x11: 650 return linux_write_ldt(l, (const void *)uap, 0); 651#endif /* USER_LDT */ 652 653 default: 654 return (ENOSYS); 655 } 656} 657 658/* 659 * XXX Pathetic hack to make svgalib work. This will fake the major 660 * device number of an opened VT so that svgalib likes it. grmbl. 661 * Should probably do it 'wrong the right way' and use a mapping 662 * array for all major device numbers, and map linux_mknod too. 663 */ 664dev_t 665linux_fakedev(dev_t dev, int raw) 666{ 667 extern const struct cdevsw ptc_cdevsw, pts_cdevsw; 668 const struct cdevsw *cd = cdevsw_lookup(dev); 669 670 if (raw) { 671#if (NWSDISPLAY > 0) 672 extern const struct cdevsw wsdisplay_cdevsw; 673 if (cd == &wsdisplay_cdevsw) 674 return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1)); 675#endif 676 } 677 678 if (cd == &ptc_cdevsw) 679 return makedev(LINUX_PTC_MAJOR, minor(dev)); 680 if (cd == &pts_cdevsw) 681 return makedev(LINUX_PTS_MAJOR, minor(dev)); 682 683 return dev; 684} 685 686#if (NWSDISPLAY > 0) 687/* 688 * That's not complete, but enough to get an X server running. 689 */ 690#define NR_KEYS 128 691static const u_short plain_map[NR_KEYS] = { 692 0x0200, 0x001b, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 693 0x0037, 0x0038, 0x0039, 0x0030, 0x002d, 0x003d, 0x007f, 0x0009, 694 0x0b71, 0x0b77, 0x0b65, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69, 695 0x0b6f, 0x0b70, 0x005b, 0x005d, 0x0201, 0x0702, 0x0b61, 0x0b73, 696 0x0b64, 0x0b66, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x003b, 697 0x0027, 0x0060, 0x0700, 0x005c, 0x0b7a, 0x0b78, 0x0b63, 0x0b76, 698 0x0b62, 0x0b6e, 0x0b6d, 0x002c, 0x002e, 0x002f, 0x0700, 0x030c, 699 0x0703, 0x0020, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104, 700 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0209, 0x0307, 701 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301, 702 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003c, 0x010a, 703 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 704 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603, 705 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116, 706 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d, 707 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 708}, shift_map[NR_KEYS] = { 709 0x0200, 0x001b, 0x0021, 0x0040, 0x0023, 0x0024, 0x0025, 0x005e, 710 0x0026, 0x002a, 0x0028, 0x0029, 0x005f, 0x002b, 0x007f, 0x0009, 711 0x0b51, 0x0b57, 0x0b45, 0x0b52, 0x0b54, 0x0b59, 0x0b55, 0x0b49, 712 0x0b4f, 0x0b50, 0x007b, 0x007d, 0x0201, 0x0702, 0x0b41, 0x0b53, 713 0x0b44, 0x0b46, 0x0b47, 0x0b48, 0x0b4a, 0x0b4b, 0x0b4c, 0x003a, 714 0x0022, 0x007e, 0x0700, 0x007c, 0x0b5a, 0x0b58, 0x0b43, 0x0b56, 715 0x0b42, 0x0b4e, 0x0b4d, 0x003c, 0x003e, 0x003f, 0x0700, 0x030c, 716 0x0703, 0x0020, 0x0207, 0x010a, 0x010b, 0x010c, 0x010d, 0x010e, 717 0x010f, 0x0110, 0x0111, 0x0112, 0x0113, 0x0213, 0x0203, 0x0307, 718 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301, 719 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003e, 0x010a, 720 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 721 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603, 722 0x020b, 0x0601, 0x0602, 0x0117, 0x0600, 0x020a, 0x0115, 0x0116, 723 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d, 724 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 725}, altgr_map[NR_KEYS] = { 726 0x0200, 0x0200, 0x0200, 0x0040, 0x0200, 0x0024, 0x0200, 0x0200, 727 0x007b, 0x005b, 0x005d, 0x007d, 0x005c, 0x0200, 0x0200, 0x0200, 728 0x0b71, 0x0b77, 0x0918, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69, 729 0x0b6f, 0x0b70, 0x0200, 0x007e, 0x0201, 0x0702, 0x0914, 0x0b73, 730 0x0917, 0x0919, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x0200, 731 0x0200, 0x0200, 0x0700, 0x0200, 0x0b7a, 0x0b78, 0x0916, 0x0b76, 732 0x0915, 0x0b6e, 0x0b6d, 0x0200, 0x0200, 0x0200, 0x0700, 0x030c, 733 0x0703, 0x0200, 0x0207, 0x050c, 0x050d, 0x050e, 0x050f, 0x0510, 734 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0208, 0x0202, 0x0911, 735 0x0912, 0x0913, 0x030b, 0x090e, 0x090f, 0x0910, 0x030a, 0x090b, 736 0x090c, 0x090d, 0x090a, 0x0310, 0x0206, 0x0200, 0x007c, 0x0516, 737 0x0517, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 738 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603, 739 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116, 740 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d, 741 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 742}, ctrl_map[NR_KEYS] = { 743 0x0200, 0x0200, 0x0200, 0x0000, 0x001b, 0x001c, 0x001d, 0x001e, 744 0x001f, 0x007f, 0x0200, 0x0200, 0x001f, 0x0200, 0x0008, 0x0200, 745 0x0011, 0x0017, 0x0005, 0x0012, 0x0014, 0x0019, 0x0015, 0x0009, 746 0x000f, 0x0010, 0x001b, 0x001d, 0x0201, 0x0702, 0x0001, 0x0013, 747 0x0004, 0x0006, 0x0007, 0x0008, 0x000a, 0x000b, 0x000c, 0x0200, 748 0x0007, 0x0000, 0x0700, 0x001c, 0x001a, 0x0018, 0x0003, 0x0016, 749 0x0002, 0x000e, 0x000d, 0x0200, 0x020e, 0x007f, 0x0700, 0x030c, 750 0x0703, 0x0000, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104, 751 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0204, 0x0307, 752 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301, 753 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x0200, 0x010a, 754 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 755 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603, 756 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116, 757 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d, 758 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 759}; 760 761const u_short * const linux_keytabs[] = { 762 plain_map, shift_map, altgr_map, altgr_map, ctrl_map 763}; 764#endif 765 766static struct biosdisk_info * 767fd2biosinfo(struct proc *p, struct file *fp) 768{ 769 struct vnode *vp; 770 const char *blkname; 771 char diskname[16]; 772 int i; 773 struct nativedisk_info *nip; 774 struct disklist *dl = x86_alldisks; 775 776 if (fp->f_type != DTYPE_VNODE) 777 return NULL; 778 vp = (struct vnode *)fp->f_data; 779 780 if (vp->v_type != VBLK) 781 return NULL; 782 783 blkname = devsw_blk2name(major(vp->v_rdev)); 784 snprintf(diskname, sizeof diskname, "%s%llu", blkname, 785 (unsigned long long)DISKUNIT(vp->v_rdev)); 786 787 for (i = 0; i < dl->dl_nnativedisks; i++) { 788 nip = &dl->dl_nativedisks[i]; 789 if (strcmp(diskname, nip->ni_devname)) 790 continue; 791 if (nip->ni_nmatches != 0) 792 return &dl->dl_biosdisks[nip->ni_biosmatches[0]]; 793 } 794 795 return NULL; 796} 797 798 799/* 800 * We come here in a last attempt to satisfy a Linux ioctl() call 801 */ 802int 803linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval) 804{ 805 /* { 806 syscallarg(int) fd; 807 syscallarg(u_long) com; 808 syscallarg(void *) data; 809 } */ 810 struct sys_ioctl_args bia; 811 u_long com; 812 int error, error1; 813#if (NWSDISPLAY > 0) 814 struct vt_mode lvt; 815 struct kbentry kbe; 816#endif 817 struct linux_hd_geometry hdg; 818 struct linux_hd_big_geometry hdg_big; 819 struct biosdisk_info *bip; 820 file_t *fp; 821 int fd; 822 struct disklabel label, *labp; 823 struct partinfo partp; 824 int (*ioctlf)(struct file *, u_long, void *); 825 u_long start, biostotal, realtotal; 826 u_char heads, sectors; 827 u_int cylinders; 828 struct ioctl_pt pt; 829 830 fd = SCARG(uap, fd); 831 SCARG(&bia, fd) = fd; 832 SCARG(&bia, data) = SCARG(uap, data); 833 com = SCARG(uap, com); 834 835 if ((fp = fd_getfile(fd)) == NULL) 836 return (EBADF); 837 838 switch (com) { 839#if (NWSDISPLAY > 0) 840 case LINUX_KDGKBMODE: 841 com = KDGKBMODE; 842 break; 843 case LINUX_KDSKBMODE: 844 com = KDSKBMODE; 845 if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW) 846 SCARG(&bia, data) = (void *)K_RAW; 847 break; 848 case LINUX_KIOCSOUND: 849 SCARG(&bia, data) = 850 (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff); 851 /* fall through */ 852 case LINUX_KDMKTONE: 853 com = KDMKTONE; 854 break; 855 case LINUX_KDSETMODE: 856 com = KDSETMODE; 857 break; 858 case LINUX_KDGETMODE: 859 /* KD_* values are equal to the wscons numbers */ 860 com = WSDISPLAYIO_GMODE; 861 break; 862 case LINUX_KDENABIO: 863 com = KDENABIO; 864 break; 865 case LINUX_KDDISABIO: 866 com = KDDISABIO; 867 break; 868 case LINUX_KDGETLED: 869 com = KDGETLED; 870 break; 871 case LINUX_KDSETLED: 872 com = KDSETLED; 873 break; 874 case LINUX_VT_OPENQRY: 875 com = VT_OPENQRY; 876 break; 877 case LINUX_VT_GETMODE: 878 error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt); 879 if (error != 0) 880 goto out; 881 lvt.relsig = native_to_linux_signo[lvt.relsig]; 882 lvt.acqsig = native_to_linux_signo[lvt.acqsig]; 883 lvt.frsig = native_to_linux_signo[lvt.frsig]; 884 error = copyout(&lvt, SCARG(uap, data), sizeof (lvt)); 885 goto out; 886 case LINUX_VT_SETMODE: 887 error = copyin(SCARG(uap, data), &lvt, sizeof (lvt)); 888 if (error != 0) 889 goto out; 890 lvt.relsig = linux_to_native_signo[lvt.relsig]; 891 lvt.acqsig = linux_to_native_signo[lvt.acqsig]; 892 lvt.frsig = linux_to_native_signo[lvt.frsig]; 893 error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt); 894 goto out; 895 case LINUX_VT_DISALLOCATE: 896 /* XXX should use WSDISPLAYIO_DELSCREEN */ 897 error = 0; 898 goto out; 899 case LINUX_VT_RELDISP: 900 com = VT_RELDISP; 901 break; 902 case LINUX_VT_ACTIVATE: 903 com = VT_ACTIVATE; 904 break; 905 case LINUX_VT_WAITACTIVE: 906 com = VT_WAITACTIVE; 907 break; 908 case LINUX_VT_GETSTATE: 909 com = VT_GETSTATE; 910 break; 911 case LINUX_KDGKBTYPE: 912 { 913 static const u_int8_t kb101 = KB_101; 914 915 /* This is what Linux does. */ 916 error = copyout(&kb101, SCARG(uap, data), 1); 917 goto out; 918 } 919 case LINUX_KDGKBENT: 920 /* 921 * The Linux KDGKBENT ioctl is different from the 922 * SYSV original. So we handle it in machdep code. 923 * XXX We should use keyboard mapping information 924 * from wsdisplay, but this would be expensive. 925 */ 926 if ((error = copyin(SCARG(uap, data), &kbe, 927 sizeof(struct kbentry)))) 928 goto out; 929 if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *) 930 || kbe.kb_index >= NR_KEYS) { 931 error = EINVAL; 932 goto out; 933 } 934 kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index]; 935 error = copyout(&kbe, SCARG(uap, data), 936 sizeof(struct kbentry)); 937 goto out; 938#endif 939 case LINUX_HDIO_GETGEO: 940 case LINUX_HDIO_GETGEO_BIG: 941 /* 942 * Try to mimic Linux behaviour: return the BIOS geometry 943 * if possible (extending its # of cylinders if it's beyond 944 * the 1023 limit), fall back to the MI geometry (i.e. 945 * the real geometry) if not found, by returning an 946 * error. See common/linux_hdio.c 947 */ 948 bip = fd2biosinfo(curproc, fp); 949 ioctlf = fp->f_ops->fo_ioctl; 950 error = ioctlf(fp, DIOCGDEFLABEL, (void *)&label); 951 error1 = ioctlf(fp, DIOCGPART, (void *)&partp); 952 if (error != 0 && error1 != 0) { 953 error = error1; 954 goto out; 955 } 956 labp = error != 0 ? &label : partp.disklab; 957 start = error1 != 0 ? partp.part->p_offset : 0; 958 if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0 959 && bip->bi_cyl != 0) { 960 heads = bip->bi_head; 961 sectors = bip->bi_sec; 962 cylinders = bip->bi_cyl; 963 biostotal = heads * sectors * cylinders; 964 realtotal = labp->d_ntracks * labp->d_nsectors * 965 labp->d_ncylinders; 966 if (realtotal > biostotal) 967 cylinders = realtotal / (heads * sectors); 968 } else { 969 heads = labp->d_ntracks; 970 cylinders = labp->d_ncylinders; 971 sectors = labp->d_nsectors; 972 } 973 if (com == LINUX_HDIO_GETGEO) { 974 hdg.start = start; 975 hdg.heads = heads; 976 hdg.cylinders = cylinders; 977 hdg.sectors = sectors; 978 error = copyout(&hdg, SCARG(uap, data), sizeof hdg); 979 goto out; 980 } else { 981 hdg_big.start = start; 982 hdg_big.heads = heads; 983 hdg_big.cylinders = cylinders; 984 hdg_big.sectors = sectors; 985 error = copyout(&hdg_big, SCARG(uap, data), 986 sizeof hdg_big); 987 goto out; 988 } 989 990 default: 991 /* 992 * Unknown to us. If it's on a device, just pass it through 993 * using PTIOCLINUX, the device itself might be able to 994 * make some sense of it. 995 * XXX hack: if the function returns EJUSTRETURN, 996 * it has stuffed a sysctl return value in pt.data. 997 */ 998 ioctlf = fp->f_ops->fo_ioctl; 999 pt.com = SCARG(uap, com); 1000 pt.data = SCARG(uap, data); 1001 error = ioctlf(fp, PTIOCLINUX, &pt); 1002 if (error == EJUSTRETURN) { 1003 retval[0] = (register_t)pt.data; 1004 error = 0; 1005 } 1006 1007 if (error == ENOTTY) { 1008 DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n", 1009 com)); 1010 } 1011 goto out; 1012 } 1013 SCARG(&bia, com) = com; 1014 error = sys_ioctl(curlwp, &bia, retval); 1015out: 1016 fd_putfile(fd); 1017 return error; 1018} 1019 1020/* 1021 * Set I/O permissions for a process. Just set the maximum level 1022 * right away (ignoring the argument), otherwise we would have 1023 * to rely on I/O permission maps, which are not implemented. 1024 */ 1025int 1026linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval) 1027{ 1028 /* { 1029 syscallarg(int) level; 1030 } */ 1031 struct trapframe *fp = l->l_md.md_regs; 1032 1033 if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL, 1034 NULL, NULL, NULL, NULL) != 0) 1035 return EPERM; 1036 fp->tf_eflags |= PSL_IOPL; 1037 *retval = 0; 1038 return 0; 1039} 1040 1041/* 1042 * See above. If a root process tries to set access to an I/O port, 1043 * just let it have the whole range. 1044 */ 1045int 1046linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval) 1047{ 1048 /* { 1049 syscallarg(unsigned int) lo; 1050 syscallarg(unsigned int) hi; 1051 syscallarg(int) val; 1052 } */ 1053 struct trapframe *fp = l->l_md.md_regs; 1054 1055 if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ? 1056 KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL, 1057 NULL, NULL) != 0) 1058 return EPERM; 1059 if (SCARG(uap, val)) 1060 fp->tf_eflags |= PSL_IOPL; 1061 *retval = 0; 1062 return 0; 1063} 1064 1065int 1066linux_usertrap(struct lwp *l, vaddr_t trapaddr, 1067 void *arg) 1068{ 1069 return 0; 1070} 1071 1072const char * 1073linux_get_uname_arch(void) 1074{ 1075 static char uname_arch[5] = "i386"; 1076 1077 if (uname_arch[1] == '3') 1078 uname_arch[1] += cpu_class; 1079 return uname_arch; 1080} 1081