machdep.c revision 195907
1/*- 2 * Copyright (c) 1992 Terrence R. Lambert. 3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * William Jolitz. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 38 */ 39 40#include <sys/cdefs.h> 41__FBSDID("$FreeBSD: head/sys/i386/i386/machdep.c 195907 2009-07-27 13:51:55Z rpaulo $"); 42 43#include "opt_apic.h" 44#include "opt_atalk.h" 45#include "opt_compat.h" 46#include "opt_cpu.h" 47#include "opt_ddb.h" 48#include "opt_inet.h" 49#include "opt_ipx.h" 50#include "opt_isa.h" 51#include "opt_kstack_pages.h" 52#include "opt_maxmem.h" 53#include "opt_msgbuf.h" 54#include "opt_npx.h" 55#include "opt_perfmon.h" 56#include "opt_xbox.h" 57 58#include <sys/param.h> 59#include <sys/proc.h> 60#include <sys/systm.h> 61#include <sys/bio.h> 62#include <sys/buf.h> 63#include <sys/bus.h> 64#include <sys/callout.h> 65#include <sys/cons.h> 66#include <sys/cpu.h> 67#include <sys/eventhandler.h> 68#include <sys/exec.h> 69#include <sys/imgact.h> 70#include <sys/kdb.h> 71#include <sys/kernel.h> 72#include <sys/ktr.h> 73#include <sys/linker.h> 74#include <sys/lock.h> 75#include <sys/malloc.h> 76#include <sys/memrange.h> 77#include <sys/msgbuf.h> 78#include <sys/mutex.h> 79#include <sys/pcpu.h> 80#include <sys/ptrace.h> 81#include <sys/reboot.h> 82#include <sys/sched.h> 83#include <sys/signalvar.h> 84#include <sys/sysctl.h> 85#include <sys/sysent.h> 86#include <sys/sysproto.h> 87#include <sys/ucontext.h> 88#include <sys/vmmeter.h> 89 90#include <vm/vm.h> 91#include <vm/vm_extern.h> 92#include <vm/vm_kern.h> 93#include <vm/vm_page.h> 94#include <vm/vm_map.h> 95#include <vm/vm_object.h> 96#include <vm/vm_pager.h> 97#include <vm/vm_param.h> 98 99#ifdef DDB 100#ifndef KDB 101#error KDB must be enabled in order for DDB to work! 102#endif 103#include <ddb/ddb.h> 104#include <ddb/db_sym.h> 105#endif 106 107#include <isa/rtc.h> 108 109#include <net/netisr.h> 110 111#include <machine/bootinfo.h> 112#include <machine/clock.h> 113#include <machine/cpu.h> 114#include <machine/cputypes.h> 115#include <machine/intr_machdep.h> 116#include <machine/mca.h> 117#include <machine/md_var.h> 118#include <machine/metadata.h> 119#include <machine/pc/bios.h> 120#include <machine/pcb.h> 121#include <machine/pcb_ext.h> 122#include <machine/proc.h> 123#include <machine/reg.h> 124#include <machine/sigframe.h> 125#include <machine/specialreg.h> 126#include <machine/vm86.h> 127#ifdef PERFMON 128#include <machine/perfmon.h> 129#endif 130#ifdef SMP 131#include <machine/smp.h> 132#endif 133 134#ifdef DEV_ISA 135#include <i386/isa/icu.h> 136#endif 137 138#ifdef XBOX 139#include <machine/xbox.h> 140 141int arch_i386_is_xbox = 0; 142uint32_t arch_i386_xbox_memsize = 0; 143#endif 144 145#ifdef XEN 146/* XEN includes */ 147#include <machine/xen/xen-os.h> 148#include <xen/hypervisor.h> 149#include <machine/xen/xen-os.h> 150#include <machine/xen/xenvar.h> 151#include <machine/xen/xenfunc.h> 152#include <xen/xen_intr.h> 153 154void Xhypervisor_callback(void); 155void failsafe_callback(void); 156 157extern trap_info_t trap_table[]; 158struct proc_ldt default_proc_ldt; 159extern int init_first; 160int running_xen = 1; 161extern unsigned long physfree; 162#endif /* XEN */ 163 164/* Sanity check for __curthread() */ 165CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); 166 167extern void init386(int first); 168extern void dblfault_handler(void); 169 170extern void printcpuinfo(void); /* XXX header file */ 171extern void finishidentcpu(void); 172extern void panicifcpuunsupported(void); 173extern void initializecpu(void); 174 175#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 176#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 177 178#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) 179#define CPU_ENABLE_SSE 180#endif 181 182static void cpu_startup(void *); 183static void fpstate_drop(struct thread *td); 184static void get_fpcontext(struct thread *td, mcontext_t *mcp); 185static int set_fpcontext(struct thread *td, const mcontext_t *mcp); 186#ifdef CPU_ENABLE_SSE 187static void set_fpregs_xmm(struct save87 *, struct savexmm *); 188static void fill_fpregs_xmm(struct savexmm *, struct save87 *); 189#endif /* CPU_ENABLE_SSE */ 190SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); 191 192#ifdef DDB 193extern vm_offset_t ksym_start, ksym_end; 194#endif 195 196/* Intel ICH registers */ 197#define ICH_PMBASE 0x400 198#define ICH_SMI_EN ICH_PMBASE + 0x30 199 200int _udatasel, _ucodesel; 201u_int basemem; 202 203int cold = 1; 204 205#ifdef COMPAT_43 206static void osendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask); 207#endif 208#ifdef COMPAT_FREEBSD4 209static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask); 210#endif 211 212long Maxmem = 0; 213long realmem = 0; 214 215#ifdef PAE 216FEATURE(pae, "Physical Address Extensions"); 217#endif 218 219/* 220 * The number of PHYSMAP entries must be one less than the number of 221 * PHYSSEG entries because the PHYSMAP entry that spans the largest 222 * physical address that is accessible by ISA DMA is split into two 223 * PHYSSEG entries. 224 */ 225#define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) 226 227vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; 228vm_paddr_t dump_avail[PHYSMAP_SIZE + 2]; 229 230/* must be 2 less so 0 0 can signal end of chunks */ 231#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2) 232#define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2) 233 234struct kva_md_info kmi; 235 236static struct trapframe proc0_tf; 237struct pcpu __pcpu[MAXCPU]; 238 239struct mtx icu_lock; 240 241struct mem_range_softc mem_range_softc; 242 243static void 244cpu_startup(dummy) 245 void *dummy; 246{ 247 uintmax_t memsize; 248 char *sysenv; 249 250 /* 251 * On MacBooks, we need to disallow the legacy USB circuit to 252 * generate an SMI# because this can cause several problems, 253 * namely: incorrect CPU frequency detection and failure to 254 * start the APs. 255 * We do this by disabling a bit in the SMI_EN (SMI Control and 256 * Enable register) of the Intel ICH LPC Interface Bridge. 257 */ 258 sysenv = getenv("smbios.system.product"); 259 if (sysenv != NULL) { 260 if (strncmp(sysenv, "MacBook1,1", 10) == 0 || 261 strncmp(sysenv, "MacBookPro1,1", 13) == 0 || 262 strncmp(sysenv, "MacBookPro1,2", 13) == 0 || 263 strncmp(sysenv, "Macmini1,1", 10) == 0) { 264 if (bootverbose) 265 printf("Disabling LEGACY_USB_EN bit on " 266 "Intel ICH.\n"); 267 outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); 268 } 269 freeenv(sysenv); 270 } 271 272 /* 273 * Good {morning,afternoon,evening,night}. 274 */ 275 startrtclock(); 276 printcpuinfo(); 277 panicifcpuunsupported(); 278#ifdef PERFMON 279 perfmon_init(); 280#endif 281 sysenv = getenv("smbios.memory.enabled"); 282 if (sysenv != NULL) { 283 memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10); 284 freeenv(sysenv); 285 } else 286 memsize = 0; 287 if (memsize > 0) 288 printf("real memory = %ju (%ju MB)\n", memsize << 10, 289 memsize >> 10); 290 else 291 printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)Maxmem), 292 ptoa((uintmax_t)Maxmem) / 1048576); 293 realmem = Maxmem; 294 /* 295 * Display any holes after the first chunk of extended memory. 296 */ 297 if (bootverbose) { 298 int indx; 299 300 printf("Physical memory chunk(s):\n"); 301 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { 302 vm_paddr_t size; 303 304 size = phys_avail[indx + 1] - phys_avail[indx]; 305 printf( 306 "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", 307 (uintmax_t)phys_avail[indx], 308 (uintmax_t)phys_avail[indx + 1] - 1, 309 (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); 310 } 311 } 312 313 vm_ksubmap_init(&kmi); 314 315 printf("avail memory = %ju (%ju MB)\n", 316 ptoa((uintmax_t)cnt.v_free_count), 317 ptoa((uintmax_t)cnt.v_free_count) / 1048576); 318 319 /* 320 * Set up buffers, so they can be used to read disk labels. 321 */ 322 bufinit(); 323 vm_pager_bufferinit(); 324#ifndef XEN 325 cpu_setregs(); 326#endif 327 mca_init(); 328} 329 330/* 331 * Send an interrupt to process. 332 * 333 * Stack is set up to allow sigcode stored 334 * at top to call routine, followed by kcall 335 * to sigreturn routine below. After sigreturn 336 * resets the signal mask, the stack, and the 337 * frame pointer, it returns to the user 338 * specified pc, psl. 339 */ 340#ifdef COMPAT_43 341static void 342osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 343{ 344 struct osigframe sf, *fp; 345 struct proc *p; 346 struct thread *td; 347 struct sigacts *psp; 348 struct trapframe *regs; 349 int sig; 350 int oonstack; 351 352 td = curthread; 353 p = td->td_proc; 354 PROC_LOCK_ASSERT(p, MA_OWNED); 355 sig = ksi->ksi_signo; 356 psp = p->p_sigacts; 357 mtx_assert(&psp->ps_mtx, MA_OWNED); 358 regs = td->td_frame; 359 oonstack = sigonstack(regs->tf_esp); 360 361 /* Allocate space for the signal handler context. */ 362 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 363 SIGISMEMBER(psp->ps_sigonstack, sig)) { 364 fp = (struct osigframe *)(td->td_sigstk.ss_sp + 365 td->td_sigstk.ss_size - sizeof(struct osigframe)); 366#if defined(COMPAT_43) 367 td->td_sigstk.ss_flags |= SS_ONSTACK; 368#endif 369 } else 370 fp = (struct osigframe *)regs->tf_esp - 1; 371 372 /* Translate the signal if appropriate. */ 373 if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) 374 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 375 376 /* Build the argument list for the signal handler. */ 377 sf.sf_signum = sig; 378 sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc; 379 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 380 /* Signal handler installed with SA_SIGINFO. */ 381 sf.sf_arg2 = (register_t)&fp->sf_siginfo; 382 sf.sf_siginfo.si_signo = sig; 383 sf.sf_siginfo.si_code = ksi->ksi_code; 384 sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher; 385 } else { 386 /* Old FreeBSD-style arguments. */ 387 sf.sf_arg2 = ksi->ksi_code; 388 sf.sf_addr = (register_t)ksi->ksi_addr; 389 sf.sf_ahu.sf_handler = catcher; 390 } 391 mtx_unlock(&psp->ps_mtx); 392 PROC_UNLOCK(p); 393 394 /* Save most if not all of trap frame. */ 395 sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax; 396 sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx; 397 sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx; 398 sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx; 399 sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi; 400 sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi; 401 sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs; 402 sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds; 403 sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss; 404 sf.sf_siginfo.si_sc.sc_es = regs->tf_es; 405 sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs; 406 sf.sf_siginfo.si_sc.sc_gs = rgs(); 407 sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp; 408 409 /* Build the signal context to be used by osigreturn(). */ 410 sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0; 411 SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask); 412 sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp; 413 sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp; 414 sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip; 415 sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags; 416 sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno; 417 sf.sf_siginfo.si_sc.sc_err = regs->tf_err; 418 419 /* 420 * If we're a vm86 process, we want to save the segment registers. 421 * We also change eflags to be our emulated eflags, not the actual 422 * eflags. 423 */ 424 if (regs->tf_eflags & PSL_VM) { 425 /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */ 426 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; 427 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; 428 429 sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs; 430 sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs; 431 sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es; 432 sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds; 433 434 if (vm86->vm86_has_vme == 0) 435 sf.sf_siginfo.si_sc.sc_ps = 436 (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | 437 (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); 438 439 /* See sendsig() for comments. */ 440 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); 441 } 442 443 /* 444 * Copy the sigframe out to the user's stack. 445 */ 446 if (copyout(&sf, fp, sizeof(*fp)) != 0) { 447#ifdef DEBUG 448 printf("process %ld has trashed its stack\n", (long)p->p_pid); 449#endif 450 PROC_LOCK(p); 451 sigexit(td, SIGILL); 452 } 453 454 regs->tf_esp = (int)fp; 455 regs->tf_eip = PS_STRINGS - szosigcode; 456 regs->tf_eflags &= ~(PSL_T | PSL_D); 457 regs->tf_cs = _ucodesel; 458 regs->tf_ds = _udatasel; 459 regs->tf_es = _udatasel; 460 regs->tf_fs = _udatasel; 461 load_gs(_udatasel); 462 regs->tf_ss = _udatasel; 463 PROC_LOCK(p); 464 mtx_lock(&psp->ps_mtx); 465} 466#endif /* COMPAT_43 */ 467 468#ifdef COMPAT_FREEBSD4 469static void 470freebsd4_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 471{ 472 struct sigframe4 sf, *sfp; 473 struct proc *p; 474 struct thread *td; 475 struct sigacts *psp; 476 struct trapframe *regs; 477 int sig; 478 int oonstack; 479 480 td = curthread; 481 p = td->td_proc; 482 PROC_LOCK_ASSERT(p, MA_OWNED); 483 sig = ksi->ksi_signo; 484 psp = p->p_sigacts; 485 mtx_assert(&psp->ps_mtx, MA_OWNED); 486 regs = td->td_frame; 487 oonstack = sigonstack(regs->tf_esp); 488 489 /* Save user context. */ 490 bzero(&sf, sizeof(sf)); 491 sf.sf_uc.uc_sigmask = *mask; 492 sf.sf_uc.uc_stack = td->td_sigstk; 493 sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 494 ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; 495 sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; 496 sf.sf_uc.uc_mcontext.mc_gs = rgs(); 497 bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); 498 499 /* Allocate space for the signal handler context. */ 500 if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && 501 SIGISMEMBER(psp->ps_sigonstack, sig)) { 502 sfp = (struct sigframe4 *)(td->td_sigstk.ss_sp + 503 td->td_sigstk.ss_size - sizeof(struct sigframe4)); 504#if defined(COMPAT_43) 505 td->td_sigstk.ss_flags |= SS_ONSTACK; 506#endif 507 } else 508 sfp = (struct sigframe4 *)regs->tf_esp - 1; 509 510 /* Translate the signal if appropriate. */ 511 if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) 512 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 513 514 /* Build the argument list for the signal handler. */ 515 sf.sf_signum = sig; 516 sf.sf_ucontext = (register_t)&sfp->sf_uc; 517 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 518 /* Signal handler installed with SA_SIGINFO. */ 519 sf.sf_siginfo = (register_t)&sfp->sf_si; 520 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; 521 522 /* Fill in POSIX parts */ 523 sf.sf_si.si_signo = sig; 524 sf.sf_si.si_code = ksi->ksi_code; 525 sf.sf_si.si_addr = ksi->ksi_addr; 526 } else { 527 /* Old FreeBSD-style arguments. */ 528 sf.sf_siginfo = ksi->ksi_code; 529 sf.sf_addr = (register_t)ksi->ksi_addr; 530 sf.sf_ahu.sf_handler = catcher; 531 } 532 mtx_unlock(&psp->ps_mtx); 533 PROC_UNLOCK(p); 534 535 /* 536 * If we're a vm86 process, we want to save the segment registers. 537 * We also change eflags to be our emulated eflags, not the actual 538 * eflags. 539 */ 540 if (regs->tf_eflags & PSL_VM) { 541 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; 542 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; 543 544 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; 545 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; 546 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; 547 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; 548 549 if (vm86->vm86_has_vme == 0) 550 sf.sf_uc.uc_mcontext.mc_eflags = 551 (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | 552 (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); 553 554 /* 555 * Clear PSL_NT to inhibit T_TSSFLT faults on return from 556 * syscalls made by the signal handler. This just avoids 557 * wasting time for our lazy fixup of such faults. PSL_NT 558 * does nothing in vm86 mode, but vm86 programs can set it 559 * almost legitimately in probes for old cpu types. 560 */ 561 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); 562 } 563 564 /* 565 * Copy the sigframe out to the user's stack. 566 */ 567 if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { 568#ifdef DEBUG 569 printf("process %ld has trashed its stack\n", (long)p->p_pid); 570#endif 571 PROC_LOCK(p); 572 sigexit(td, SIGILL); 573 } 574 575 regs->tf_esp = (int)sfp; 576 regs->tf_eip = PS_STRINGS - szfreebsd4_sigcode; 577 regs->tf_eflags &= ~(PSL_T | PSL_D); 578 regs->tf_cs = _ucodesel; 579 regs->tf_ds = _udatasel; 580 regs->tf_es = _udatasel; 581 regs->tf_fs = _udatasel; 582 regs->tf_ss = _udatasel; 583 PROC_LOCK(p); 584 mtx_lock(&psp->ps_mtx); 585} 586#endif /* COMPAT_FREEBSD4 */ 587 588void 589sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 590{ 591 struct sigframe sf, *sfp; 592 struct proc *p; 593 struct thread *td; 594 struct sigacts *psp; 595 char *sp; 596 struct trapframe *regs; 597 struct segment_descriptor *sdp; 598 int sig; 599 int oonstack; 600 601 td = curthread; 602 p = td->td_proc; 603 PROC_LOCK_ASSERT(p, MA_OWNED); 604 sig = ksi->ksi_signo; 605 psp = p->p_sigacts; 606 mtx_assert(&psp->ps_mtx, MA_OWNED); 607#ifdef COMPAT_FREEBSD4 608 if (SIGISMEMBER(psp->ps_freebsd4, sig)) { 609 freebsd4_sendsig(catcher, ksi, mask); 610 return; 611 } 612#endif 613#ifdef COMPAT_43 614 if (SIGISMEMBER(psp->ps_osigset, sig)) { 615 osendsig(catcher, ksi, mask); 616 return; 617 } 618#endif 619 regs = td->td_frame; 620 oonstack = sigonstack(regs->tf_esp); 621 622 /* Save user context. */ 623 bzero(&sf, sizeof(sf)); 624 sf.sf_uc.uc_sigmask = *mask; 625 sf.sf_uc.uc_stack = td->td_sigstk; 626 sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 627 ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; 628 sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; 629 sf.sf_uc.uc_mcontext.mc_gs = rgs(); 630 bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); 631 sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ 632 get_fpcontext(td, &sf.sf_uc.uc_mcontext); 633 fpstate_drop(td); 634 /* 635 * Unconditionally fill the fsbase and gsbase into the mcontext. 636 */ 637 sdp = &td->td_pcb->pcb_gsd; 638 sf.sf_uc.uc_mcontext.mc_fsbase = sdp->sd_hibase << 24 | 639 sdp->sd_lobase; 640 sdp = &td->td_pcb->pcb_fsd; 641 sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 | 642 sdp->sd_lobase; 643 644 /* Allocate space for the signal handler context. */ 645 if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && 646 SIGISMEMBER(psp->ps_sigonstack, sig)) { 647 sp = td->td_sigstk.ss_sp + 648 td->td_sigstk.ss_size - sizeof(struct sigframe); 649#if defined(COMPAT_43) 650 td->td_sigstk.ss_flags |= SS_ONSTACK; 651#endif 652 } else 653 sp = (char *)regs->tf_esp - sizeof(struct sigframe); 654 /* Align to 16 bytes. */ 655 sfp = (struct sigframe *)((unsigned int)sp & ~0xF); 656 657 /* Translate the signal if appropriate. */ 658 if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) 659 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 660 661 /* Build the argument list for the signal handler. */ 662 sf.sf_signum = sig; 663 sf.sf_ucontext = (register_t)&sfp->sf_uc; 664 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 665 /* Signal handler installed with SA_SIGINFO. */ 666 sf.sf_siginfo = (register_t)&sfp->sf_si; 667 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; 668 669 /* Fill in POSIX parts */ 670 sf.sf_si = ksi->ksi_info; 671 sf.sf_si.si_signo = sig; /* maybe a translated signal */ 672 } else { 673 /* Old FreeBSD-style arguments. */ 674 sf.sf_siginfo = ksi->ksi_code; 675 sf.sf_addr = (register_t)ksi->ksi_addr; 676 sf.sf_ahu.sf_handler = catcher; 677 } 678 mtx_unlock(&psp->ps_mtx); 679 PROC_UNLOCK(p); 680 681 /* 682 * If we're a vm86 process, we want to save the segment registers. 683 * We also change eflags to be our emulated eflags, not the actual 684 * eflags. 685 */ 686 if (regs->tf_eflags & PSL_VM) { 687 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; 688 struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; 689 690 sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; 691 sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; 692 sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; 693 sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; 694 695 if (vm86->vm86_has_vme == 0) 696 sf.sf_uc.uc_mcontext.mc_eflags = 697 (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | 698 (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); 699 700 /* 701 * Clear PSL_NT to inhibit T_TSSFLT faults on return from 702 * syscalls made by the signal handler. This just avoids 703 * wasting time for our lazy fixup of such faults. PSL_NT 704 * does nothing in vm86 mode, but vm86 programs can set it 705 * almost legitimately in probes for old cpu types. 706 */ 707 tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); 708 } 709 710 /* 711 * Copy the sigframe out to the user's stack. 712 */ 713 if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { 714#ifdef DEBUG 715 printf("process %ld has trashed its stack\n", (long)p->p_pid); 716#endif 717 PROC_LOCK(p); 718 sigexit(td, SIGILL); 719 } 720 721 regs->tf_esp = (int)sfp; 722 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); 723 regs->tf_eflags &= ~(PSL_T | PSL_D); 724 regs->tf_cs = _ucodesel; 725 regs->tf_ds = _udatasel; 726 regs->tf_es = _udatasel; 727 regs->tf_fs = _udatasel; 728 regs->tf_ss = _udatasel; 729 PROC_LOCK(p); 730 mtx_lock(&psp->ps_mtx); 731} 732 733/* 734 * System call to cleanup state after a signal 735 * has been taken. Reset signal mask and 736 * stack state from context left by sendsig (above). 737 * Return to previous pc and psl as specified by 738 * context left by sendsig. Check carefully to 739 * make sure that the user has not modified the 740 * state to gain improper privileges. 741 * 742 * MPSAFE 743 */ 744#ifdef COMPAT_43 745int 746osigreturn(td, uap) 747 struct thread *td; 748 struct osigreturn_args /* { 749 struct osigcontext *sigcntxp; 750 } */ *uap; 751{ 752 struct osigcontext sc; 753 struct trapframe *regs; 754 struct osigcontext *scp; 755 struct proc *p = td->td_proc; 756 int eflags, error; 757 ksiginfo_t ksi; 758 759 regs = td->td_frame; 760 error = copyin(uap->sigcntxp, &sc, sizeof(sc)); 761 if (error != 0) 762 return (error); 763 scp = ≻ 764 eflags = scp->sc_ps; 765 if (eflags & PSL_VM) { 766 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; 767 struct vm86_kernel *vm86; 768 769 /* 770 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't 771 * set up the vm86 area, and we can't enter vm86 mode. 772 */ 773 if (td->td_pcb->pcb_ext == 0) 774 return (EINVAL); 775 vm86 = &td->td_pcb->pcb_ext->ext_vm86; 776 if (vm86->vm86_inited == 0) 777 return (EINVAL); 778 779 /* Go back to user mode if both flags are set. */ 780 if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { 781 ksiginfo_init_trap(&ksi); 782 ksi.ksi_signo = SIGBUS; 783 ksi.ksi_code = BUS_OBJERR; 784 ksi.ksi_addr = (void *)regs->tf_eip; 785 trapsignal(td, &ksi); 786 } 787 788 if (vm86->vm86_has_vme) { 789 eflags = (tf->tf_eflags & ~VME_USERCHANGE) | 790 (eflags & VME_USERCHANGE) | PSL_VM; 791 } else { 792 vm86->vm86_eflags = eflags; /* save VIF, VIP */ 793 eflags = (tf->tf_eflags & ~VM_USERCHANGE) | 794 (eflags & VM_USERCHANGE) | PSL_VM; 795 } 796 tf->tf_vm86_ds = scp->sc_ds; 797 tf->tf_vm86_es = scp->sc_es; 798 tf->tf_vm86_fs = scp->sc_fs; 799 tf->tf_vm86_gs = scp->sc_gs; 800 tf->tf_ds = _udatasel; 801 tf->tf_es = _udatasel; 802 tf->tf_fs = _udatasel; 803 } else { 804 /* 805 * Don't allow users to change privileged or reserved flags. 806 */ 807 /* 808 * XXX do allow users to change the privileged flag PSL_RF. 809 * The cpu sets PSL_RF in tf_eflags for faults. Debuggers 810 * should sometimes set it there too. tf_eflags is kept in 811 * the signal context during signal handling and there is no 812 * other place to remember it, so the PSL_RF bit may be 813 * corrupted by the signal handler without us knowing. 814 * Corruption of the PSL_RF bit at worst causes one more or 815 * one less debugger trap, so allowing it is fairly harmless. 816 */ 817 if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { 818 return (EINVAL); 819 } 820 821 /* 822 * Don't allow users to load a valid privileged %cs. Let the 823 * hardware check for invalid selectors, excess privilege in 824 * other selectors, invalid %eip's and invalid %esp's. 825 */ 826 if (!CS_SECURE(scp->sc_cs)) { 827 ksiginfo_init_trap(&ksi); 828 ksi.ksi_signo = SIGBUS; 829 ksi.ksi_code = BUS_OBJERR; 830 ksi.ksi_trapno = T_PROTFLT; 831 ksi.ksi_addr = (void *)regs->tf_eip; 832 trapsignal(td, &ksi); 833 return (EINVAL); 834 } 835 regs->tf_ds = scp->sc_ds; 836 regs->tf_es = scp->sc_es; 837 regs->tf_fs = scp->sc_fs; 838 } 839 840 /* Restore remaining registers. */ 841 regs->tf_eax = scp->sc_eax; 842 regs->tf_ebx = scp->sc_ebx; 843 regs->tf_ecx = scp->sc_ecx; 844 regs->tf_edx = scp->sc_edx; 845 regs->tf_esi = scp->sc_esi; 846 regs->tf_edi = scp->sc_edi; 847 regs->tf_cs = scp->sc_cs; 848 regs->tf_ss = scp->sc_ss; 849 regs->tf_isp = scp->sc_isp; 850 regs->tf_ebp = scp->sc_fp; 851 regs->tf_esp = scp->sc_sp; 852 regs->tf_eip = scp->sc_pc; 853 regs->tf_eflags = eflags; 854 855 PROC_LOCK(p); 856#if defined(COMPAT_43) 857 if (scp->sc_onstack & 1) 858 td->td_sigstk.ss_flags |= SS_ONSTACK; 859 else 860 td->td_sigstk.ss_flags &= ~SS_ONSTACK; 861#endif 862 SIGSETOLD(td->td_sigmask, scp->sc_mask); 863 SIG_CANTMASK(td->td_sigmask); 864 signotify(td); 865 PROC_UNLOCK(p); 866 return (EJUSTRETURN); 867} 868#endif /* COMPAT_43 */ 869 870#ifdef COMPAT_FREEBSD4 871/* 872 * MPSAFE 873 */ 874int 875freebsd4_sigreturn(td, uap) 876 struct thread *td; 877 struct freebsd4_sigreturn_args /* { 878 const ucontext4 *sigcntxp; 879 } */ *uap; 880{ 881 struct ucontext4 uc; 882 struct proc *p = td->td_proc; 883 struct trapframe *regs; 884 const struct ucontext4 *ucp; 885 int cs, eflags, error; 886 ksiginfo_t ksi; 887 888 error = copyin(uap->sigcntxp, &uc, sizeof(uc)); 889 if (error != 0) 890 return (error); 891 ucp = &uc; 892 regs = td->td_frame; 893 eflags = ucp->uc_mcontext.mc_eflags; 894 if (eflags & PSL_VM) { 895 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; 896 struct vm86_kernel *vm86; 897 898 /* 899 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't 900 * set up the vm86 area, and we can't enter vm86 mode. 901 */ 902 if (td->td_pcb->pcb_ext == 0) 903 return (EINVAL); 904 vm86 = &td->td_pcb->pcb_ext->ext_vm86; 905 if (vm86->vm86_inited == 0) 906 return (EINVAL); 907 908 /* Go back to user mode if both flags are set. */ 909 if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { 910 ksiginfo_init_trap(&ksi); 911 ksi.ksi_signo = SIGBUS; 912 ksi.ksi_code = BUS_OBJERR; 913 ksi.ksi_addr = (void *)regs->tf_eip; 914 trapsignal(td, &ksi); 915 } 916 if (vm86->vm86_has_vme) { 917 eflags = (tf->tf_eflags & ~VME_USERCHANGE) | 918 (eflags & VME_USERCHANGE) | PSL_VM; 919 } else { 920 vm86->vm86_eflags = eflags; /* save VIF, VIP */ 921 eflags = (tf->tf_eflags & ~VM_USERCHANGE) | 922 (eflags & VM_USERCHANGE) | PSL_VM; 923 } 924 bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); 925 tf->tf_eflags = eflags; 926 tf->tf_vm86_ds = tf->tf_ds; 927 tf->tf_vm86_es = tf->tf_es; 928 tf->tf_vm86_fs = tf->tf_fs; 929 tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; 930 tf->tf_ds = _udatasel; 931 tf->tf_es = _udatasel; 932 tf->tf_fs = _udatasel; 933 } else { 934 /* 935 * Don't allow users to change privileged or reserved flags. 936 */ 937 /* 938 * XXX do allow users to change the privileged flag PSL_RF. 939 * The cpu sets PSL_RF in tf_eflags for faults. Debuggers 940 * should sometimes set it there too. tf_eflags is kept in 941 * the signal context during signal handling and there is no 942 * other place to remember it, so the PSL_RF bit may be 943 * corrupted by the signal handler without us knowing. 944 * Corruption of the PSL_RF bit at worst causes one more or 945 * one less debugger trap, so allowing it is fairly harmless. 946 */ 947 if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { 948 printf("freebsd4_sigreturn: eflags = 0x%x\n", eflags); 949 return (EINVAL); 950 } 951 952 /* 953 * Don't allow users to load a valid privileged %cs. Let the 954 * hardware check for invalid selectors, excess privilege in 955 * other selectors, invalid %eip's and invalid %esp's. 956 */ 957 cs = ucp->uc_mcontext.mc_cs; 958 if (!CS_SECURE(cs)) { 959 printf("freebsd4_sigreturn: cs = 0x%x\n", cs); 960 ksiginfo_init_trap(&ksi); 961 ksi.ksi_signo = SIGBUS; 962 ksi.ksi_code = BUS_OBJERR; 963 ksi.ksi_trapno = T_PROTFLT; 964 ksi.ksi_addr = (void *)regs->tf_eip; 965 trapsignal(td, &ksi); 966 return (EINVAL); 967 } 968 969 bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); 970 } 971 972 PROC_LOCK(p); 973#if defined(COMPAT_43) 974 if (ucp->uc_mcontext.mc_onstack & 1) 975 td->td_sigstk.ss_flags |= SS_ONSTACK; 976 else 977 td->td_sigstk.ss_flags &= ~SS_ONSTACK; 978#endif 979 980 td->td_sigmask = ucp->uc_sigmask; 981 SIG_CANTMASK(td->td_sigmask); 982 signotify(td); 983 PROC_UNLOCK(p); 984 return (EJUSTRETURN); 985} 986#endif /* COMPAT_FREEBSD4 */ 987 988/* 989 * MPSAFE 990 */ 991int 992sigreturn(td, uap) 993 struct thread *td; 994 struct sigreturn_args /* { 995 const struct __ucontext *sigcntxp; 996 } */ *uap; 997{ 998 ucontext_t uc; 999 struct proc *p = td->td_proc; 1000 struct trapframe *regs; 1001 const ucontext_t *ucp; 1002 int cs, eflags, error, ret; 1003 ksiginfo_t ksi; 1004 1005 error = copyin(uap->sigcntxp, &uc, sizeof(uc)); 1006 if (error != 0) 1007 return (error); 1008 ucp = &uc; 1009 regs = td->td_frame; 1010 eflags = ucp->uc_mcontext.mc_eflags; 1011 if (eflags & PSL_VM) { 1012 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; 1013 struct vm86_kernel *vm86; 1014 1015 /* 1016 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't 1017 * set up the vm86 area, and we can't enter vm86 mode. 1018 */ 1019 if (td->td_pcb->pcb_ext == 0) 1020 return (EINVAL); 1021 vm86 = &td->td_pcb->pcb_ext->ext_vm86; 1022 if (vm86->vm86_inited == 0) 1023 return (EINVAL); 1024 1025 /* Go back to user mode if both flags are set. */ 1026 if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { 1027 ksiginfo_init_trap(&ksi); 1028 ksi.ksi_signo = SIGBUS; 1029 ksi.ksi_code = BUS_OBJERR; 1030 ksi.ksi_addr = (void *)regs->tf_eip; 1031 trapsignal(td, &ksi); 1032 } 1033 1034 if (vm86->vm86_has_vme) { 1035 eflags = (tf->tf_eflags & ~VME_USERCHANGE) | 1036 (eflags & VME_USERCHANGE) | PSL_VM; 1037 } else { 1038 vm86->vm86_eflags = eflags; /* save VIF, VIP */ 1039 eflags = (tf->tf_eflags & ~VM_USERCHANGE) | 1040 (eflags & VM_USERCHANGE) | PSL_VM; 1041 } 1042 bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); 1043 tf->tf_eflags = eflags; 1044 tf->tf_vm86_ds = tf->tf_ds; 1045 tf->tf_vm86_es = tf->tf_es; 1046 tf->tf_vm86_fs = tf->tf_fs; 1047 tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; 1048 tf->tf_ds = _udatasel; 1049 tf->tf_es = _udatasel; 1050 tf->tf_fs = _udatasel; 1051 } else { 1052 /* 1053 * Don't allow users to change privileged or reserved flags. 1054 */ 1055 /* 1056 * XXX do allow users to change the privileged flag PSL_RF. 1057 * The cpu sets PSL_RF in tf_eflags for faults. Debuggers 1058 * should sometimes set it there too. tf_eflags is kept in 1059 * the signal context during signal handling and there is no 1060 * other place to remember it, so the PSL_RF bit may be 1061 * corrupted by the signal handler without us knowing. 1062 * Corruption of the PSL_RF bit at worst causes one more or 1063 * one less debugger trap, so allowing it is fairly harmless. 1064 */ 1065 if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { 1066 printf("sigreturn: eflags = 0x%x\n", eflags); 1067 return (EINVAL); 1068 } 1069 1070 /* 1071 * Don't allow users to load a valid privileged %cs. Let the 1072 * hardware check for invalid selectors, excess privilege in 1073 * other selectors, invalid %eip's and invalid %esp's. 1074 */ 1075 cs = ucp->uc_mcontext.mc_cs; 1076 if (!CS_SECURE(cs)) { 1077 printf("sigreturn: cs = 0x%x\n", cs); 1078 ksiginfo_init_trap(&ksi); 1079 ksi.ksi_signo = SIGBUS; 1080 ksi.ksi_code = BUS_OBJERR; 1081 ksi.ksi_trapno = T_PROTFLT; 1082 ksi.ksi_addr = (void *)regs->tf_eip; 1083 trapsignal(td, &ksi); 1084 return (EINVAL); 1085 } 1086 1087 ret = set_fpcontext(td, &ucp->uc_mcontext); 1088 if (ret != 0) 1089 return (ret); 1090 bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); 1091 } 1092 1093 PROC_LOCK(p); 1094#if defined(COMPAT_43) 1095 if (ucp->uc_mcontext.mc_onstack & 1) 1096 td->td_sigstk.ss_flags |= SS_ONSTACK; 1097 else 1098 td->td_sigstk.ss_flags &= ~SS_ONSTACK; 1099#endif 1100 1101 td->td_sigmask = ucp->uc_sigmask; 1102 SIG_CANTMASK(td->td_sigmask); 1103 signotify(td); 1104 PROC_UNLOCK(p); 1105 return (EJUSTRETURN); 1106} 1107 1108/* 1109 * Machine dependent boot() routine 1110 * 1111 * I haven't seen anything to put here yet 1112 * Possibly some stuff might be grafted back here from boot() 1113 */ 1114void 1115cpu_boot(int howto) 1116{ 1117} 1118 1119/* 1120 * Flush the D-cache for non-DMA I/O so that the I-cache can 1121 * be made coherent later. 1122 */ 1123void 1124cpu_flush_dcache(void *ptr, size_t len) 1125{ 1126 /* Not applicable */ 1127} 1128 1129/* Get current clock frequency for the given cpu id. */ 1130int 1131cpu_est_clockrate(int cpu_id, uint64_t *rate) 1132{ 1133 register_t reg; 1134 uint64_t tsc1, tsc2; 1135 1136 if (pcpu_find(cpu_id) == NULL || rate == NULL) 1137 return (EINVAL); 1138 if (!tsc_present) 1139 return (EOPNOTSUPP); 1140 1141 /* If we're booting, trust the rate calibrated moments ago. */ 1142 if (cold) { 1143 *rate = tsc_freq; 1144 return (0); 1145 } 1146 1147#ifdef SMP 1148 /* Schedule ourselves on the indicated cpu. */ 1149 thread_lock(curthread); 1150 sched_bind(curthread, cpu_id); 1151 thread_unlock(curthread); 1152#endif 1153 1154 /* Calibrate by measuring a short delay. */ 1155 reg = intr_disable(); 1156 tsc1 = rdtsc(); 1157 DELAY(1000); 1158 tsc2 = rdtsc(); 1159 intr_restore(reg); 1160 1161#ifdef SMP 1162 thread_lock(curthread); 1163 sched_unbind(curthread); 1164 thread_unlock(curthread); 1165#endif 1166 1167 /* 1168 * Calculate the difference in readings, convert to Mhz, and 1169 * subtract 0.5% of the total. Empirical testing has shown that 1170 * overhead in DELAY() works out to approximately this value. 1171 */ 1172 tsc2 -= tsc1; 1173 *rate = tsc2 * 1000 - tsc2 * 5; 1174 return (0); 1175} 1176 1177 1178void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */ 1179 1180#ifdef XEN 1181 1182void 1183cpu_halt(void) 1184{ 1185 HYPERVISOR_shutdown(SHUTDOWN_poweroff); 1186} 1187 1188int scheduler_running; 1189 1190static void 1191cpu_idle_hlt(int busy) 1192{ 1193 1194 scheduler_running = 1; 1195 enable_intr(); 1196 idle_block(); 1197} 1198 1199#else 1200/* 1201 * Shutdown the CPU as much as possible 1202 */ 1203void 1204cpu_halt(void) 1205{ 1206 for (;;) 1207 __asm__ ("hlt"); 1208} 1209 1210static void 1211cpu_idle_hlt(int busy) 1212{ 1213 /* 1214 * we must absolutely guarentee that hlt is the next instruction 1215 * after sti or we introduce a timing window. 1216 */ 1217 disable_intr(); 1218 if (sched_runnable()) 1219 enable_intr(); 1220 else 1221 __asm __volatile("sti; hlt"); 1222} 1223#endif 1224 1225static void 1226cpu_idle_acpi(int busy) 1227{ 1228 disable_intr(); 1229 if (sched_runnable()) 1230 enable_intr(); 1231 else if (cpu_idle_hook) 1232 cpu_idle_hook(); 1233 else 1234 __asm __volatile("sti; hlt"); 1235} 1236 1237static int cpu_ident_amdc1e = 0; 1238 1239static int 1240cpu_probe_amdc1e(void) 1241{ 1242#ifdef DEV_APIC 1243 int i; 1244 1245 /* 1246 * Forget it, if we're not using local APIC timer. 1247 */ 1248 if (resource_disabled("apic", 0) || 1249 (resource_int_value("apic", 0, "clock", &i) == 0 && i == 0)) 1250 return (0); 1251 1252 /* 1253 * Detect the presence of C1E capability mostly on latest 1254 * dual-cores (or future) k8 family. 1255 */ 1256 if (cpu_vendor_id == CPU_VENDOR_AMD && 1257 (cpu_id & 0x00000f00) == 0x00000f00 && 1258 (cpu_id & 0x0fff0000) >= 0x00040000) { 1259 cpu_ident_amdc1e = 1; 1260 return (1); 1261 } 1262#endif 1263 return (0); 1264} 1265 1266/* 1267 * C1E renders the local APIC timer dead, so we disable it by 1268 * reading the Interrupt Pending Message register and clearing 1269 * both C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27). 1270 * 1271 * Reference: 1272 * "BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh Processors" 1273 * #32559 revision 3.00+ 1274 */ 1275#define MSR_AMDK8_IPM 0xc0010055 1276#define AMDK8_SMIONCMPHALT (1ULL << 27) 1277#define AMDK8_C1EONCMPHALT (1ULL << 28) 1278#define AMDK8_CMPHALT (AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT) 1279 1280static void 1281cpu_idle_amdc1e(int busy) 1282{ 1283 1284 disable_intr(); 1285 if (sched_runnable()) 1286 enable_intr(); 1287 else { 1288 uint64_t msr; 1289 1290 msr = rdmsr(MSR_AMDK8_IPM); 1291 if (msr & AMDK8_CMPHALT) 1292 wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT); 1293 1294 if (cpu_idle_hook) 1295 cpu_idle_hook(); 1296 else 1297 __asm __volatile("sti; hlt"); 1298 } 1299} 1300 1301static void 1302cpu_idle_spin(int busy) 1303{ 1304 return; 1305} 1306 1307#ifdef XEN 1308void (*cpu_idle_fn)(int) = cpu_idle_hlt; 1309#else 1310void (*cpu_idle_fn)(int) = cpu_idle_acpi; 1311#endif 1312 1313void 1314cpu_idle(int busy) 1315{ 1316#if defined(SMP) && !defined(XEN) 1317 if (mp_grab_cpu_hlt()) 1318 return; 1319#endif 1320 cpu_idle_fn(busy); 1321} 1322 1323/* 1324 * mwait cpu power states. Lower 4 bits are sub-states. 1325 */ 1326#define MWAIT_C0 0xf0 1327#define MWAIT_C1 0x00 1328#define MWAIT_C2 0x10 1329#define MWAIT_C3 0x20 1330#define MWAIT_C4 0x30 1331 1332#define MWAIT_DISABLED 0x0 1333#define MWAIT_WOKEN 0x1 1334#define MWAIT_WAITING 0x2 1335 1336static void 1337cpu_idle_mwait(int busy) 1338{ 1339 int *mwait; 1340 1341 mwait = (int *)PCPU_PTR(monitorbuf); 1342 *mwait = MWAIT_WAITING; 1343 if (sched_runnable()) 1344 return; 1345 cpu_monitor(mwait, 0, 0); 1346 if (*mwait == MWAIT_WAITING) 1347 cpu_mwait(0, MWAIT_C1); 1348} 1349 1350static void 1351cpu_idle_mwait_hlt(int busy) 1352{ 1353 int *mwait; 1354 1355 mwait = (int *)PCPU_PTR(monitorbuf); 1356 if (busy == 0) { 1357 *mwait = MWAIT_DISABLED; 1358 cpu_idle_hlt(busy); 1359 return; 1360 } 1361 *mwait = MWAIT_WAITING; 1362 if (sched_runnable()) 1363 return; 1364 cpu_monitor(mwait, 0, 0); 1365 if (*mwait == MWAIT_WAITING) 1366 cpu_mwait(0, MWAIT_C1); 1367} 1368 1369int 1370cpu_idle_wakeup(int cpu) 1371{ 1372 struct pcpu *pcpu; 1373 int *mwait; 1374 1375 if (cpu_idle_fn == cpu_idle_spin) 1376 return (1); 1377 if (cpu_idle_fn != cpu_idle_mwait && cpu_idle_fn != cpu_idle_mwait_hlt) 1378 return (0); 1379 pcpu = pcpu_find(cpu); 1380 mwait = (int *)pcpu->pc_monitorbuf; 1381 /* 1382 * This doesn't need to be atomic since missing the race will 1383 * simply result in unnecessary IPIs. 1384 */ 1385 if (cpu_idle_fn == cpu_idle_mwait_hlt && *mwait == MWAIT_DISABLED) 1386 return (0); 1387 *mwait = MWAIT_WOKEN; 1388 1389 return (1); 1390} 1391 1392/* 1393 * Ordered by speed/power consumption. 1394 */ 1395struct { 1396 void *id_fn; 1397 char *id_name; 1398} idle_tbl[] = { 1399 { cpu_idle_spin, "spin" }, 1400 { cpu_idle_mwait, "mwait" }, 1401 { cpu_idle_mwait_hlt, "mwait_hlt" }, 1402 { cpu_idle_amdc1e, "amdc1e" }, 1403 { cpu_idle_hlt, "hlt" }, 1404 { cpu_idle_acpi, "acpi" }, 1405 { NULL, NULL } 1406}; 1407 1408static int 1409idle_sysctl_available(SYSCTL_HANDLER_ARGS) 1410{ 1411 char *avail, *p; 1412 int error; 1413 int i; 1414 1415 avail = malloc(256, M_TEMP, M_WAITOK); 1416 p = avail; 1417 for (i = 0; idle_tbl[i].id_name != NULL; i++) { 1418 if (strstr(idle_tbl[i].id_name, "mwait") && 1419 (cpu_feature2 & CPUID2_MON) == 0) 1420 continue; 1421 if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 && 1422 cpu_ident_amdc1e == 0) 1423 continue; 1424 p += sprintf(p, "%s, ", idle_tbl[i].id_name); 1425 } 1426 error = sysctl_handle_string(oidp, avail, 0, req); 1427 free(avail, M_TEMP); 1428 return (error); 1429} 1430 1431static int 1432idle_sysctl(SYSCTL_HANDLER_ARGS) 1433{ 1434 char buf[16]; 1435 int error; 1436 char *p; 1437 int i; 1438 1439 p = "unknown"; 1440 for (i = 0; idle_tbl[i].id_name != NULL; i++) { 1441 if (idle_tbl[i].id_fn == cpu_idle_fn) { 1442 p = idle_tbl[i].id_name; 1443 break; 1444 } 1445 } 1446 strncpy(buf, p, sizeof(buf)); 1447 error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 1448 if (error != 0 || req->newptr == NULL) 1449 return (error); 1450 for (i = 0; idle_tbl[i].id_name != NULL; i++) { 1451 if (strstr(idle_tbl[i].id_name, "mwait") && 1452 (cpu_feature2 & CPUID2_MON) == 0) 1453 continue; 1454 if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 && 1455 cpu_ident_amdc1e == 0) 1456 continue; 1457 if (strcmp(idle_tbl[i].id_name, buf)) 1458 continue; 1459 cpu_idle_fn = idle_tbl[i].id_fn; 1460 return (0); 1461 } 1462 return (EINVAL); 1463} 1464 1465SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD, 1466 0, 0, idle_sysctl_available, "A", "list of available idle functions"); 1467 1468SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, 1469 idle_sysctl, "A", "currently selected idle function"); 1470 1471/* 1472 * Reset registers to default values on exec. 1473 */ 1474void 1475exec_setregs(td, entry, stack, ps_strings) 1476 struct thread *td; 1477 u_long entry; 1478 u_long stack; 1479 u_long ps_strings; 1480{ 1481 struct trapframe *regs = td->td_frame; 1482 struct pcb *pcb = td->td_pcb; 1483 1484 /* Reset pc->pcb_gs and %gs before possibly invalidating it. */ 1485 pcb->pcb_gs = _udatasel; 1486 load_gs(_udatasel); 1487 1488 mtx_lock_spin(&dt_lock); 1489 if (td->td_proc->p_md.md_ldt) 1490 user_ldt_free(td); 1491 else 1492 mtx_unlock_spin(&dt_lock); 1493 1494 bzero((char *)regs, sizeof(struct trapframe)); 1495 regs->tf_eip = entry; 1496 regs->tf_esp = stack; 1497 regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); 1498 regs->tf_ss = _udatasel; 1499 regs->tf_ds = _udatasel; 1500 regs->tf_es = _udatasel; 1501 regs->tf_fs = _udatasel; 1502 regs->tf_cs = _ucodesel; 1503 1504 /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ 1505 regs->tf_ebx = ps_strings; 1506 1507 /* 1508 * Reset the hardware debug registers if they were in use. 1509 * They won't have any meaning for the newly exec'd process. 1510 */ 1511 if (pcb->pcb_flags & PCB_DBREGS) { 1512 pcb->pcb_dr0 = 0; 1513 pcb->pcb_dr1 = 0; 1514 pcb->pcb_dr2 = 0; 1515 pcb->pcb_dr3 = 0; 1516 pcb->pcb_dr6 = 0; 1517 pcb->pcb_dr7 = 0; 1518 if (pcb == PCPU_GET(curpcb)) { 1519 /* 1520 * Clear the debug registers on the running 1521 * CPU, otherwise they will end up affecting 1522 * the next process we switch to. 1523 */ 1524 reset_dbregs(); 1525 } 1526 pcb->pcb_flags &= ~PCB_DBREGS; 1527 } 1528 1529 /* 1530 * Initialize the math emulator (if any) for the current process. 1531 * Actually, just clear the bit that says that the emulator has 1532 * been initialized. Initialization is delayed until the process 1533 * traps to the emulator (if it is done at all) mainly because 1534 * emulators don't provide an entry point for initialization. 1535 */ 1536 td->td_pcb->pcb_flags &= ~FP_SOFTFP; 1537 pcb->pcb_initial_npxcw = __INITIAL_NPXCW__; 1538 1539 /* 1540 * Drop the FP state if we hold it, so that the process gets a 1541 * clean FP state if it uses the FPU again. 1542 */ 1543 fpstate_drop(td); 1544 1545 /* 1546 * XXX - Linux emulator 1547 * Make sure sure edx is 0x0 on entry. Linux binaries depend 1548 * on it. 1549 */ 1550 td->td_retval[1] = 0; 1551} 1552 1553void 1554cpu_setregs(void) 1555{ 1556 unsigned int cr0; 1557 1558 cr0 = rcr0(); 1559 1560 /* 1561 * CR0_MP, CR0_NE and CR0_TS are set for NPX (FPU) support: 1562 * 1563 * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT 1564 * instructions. We must set the CR0_MP bit and use the CR0_TS 1565 * bit to control the trap, because setting the CR0_EM bit does 1566 * not cause WAIT instructions to trap. It's important to trap 1567 * WAIT instructions - otherwise the "wait" variants of no-wait 1568 * control instructions would degenerate to the "no-wait" variants 1569 * after FP context switches but work correctly otherwise. It's 1570 * particularly important to trap WAITs when there is no NPX - 1571 * otherwise the "wait" variants would always degenerate. 1572 * 1573 * Try setting CR0_NE to get correct error reporting on 486DX's. 1574 * Setting it should fail or do nothing on lesser processors. 1575 */ 1576 cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; 1577 load_cr0(cr0); 1578 load_gs(_udatasel); 1579} 1580 1581u_long bootdev; /* not a struct cdev *- encoding is different */ 1582SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev, 1583 CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)"); 1584 1585/* 1586 * Initialize 386 and configure to run kernel 1587 */ 1588 1589/* 1590 * Initialize segments & interrupt table 1591 */ 1592 1593int _default_ldt; 1594 1595#ifdef XEN 1596union descriptor *gdt; 1597union descriptor *ldt; 1598#else 1599union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */ 1600union descriptor ldt[NLDT]; /* local descriptor table */ 1601#endif 1602static struct gate_descriptor idt0[NIDT]; 1603struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ 1604struct region_descriptor r_gdt, r_idt; /* table descriptors */ 1605struct mtx dt_lock; /* lock for GDT and LDT */ 1606 1607#if defined(I586_CPU) && !defined(NO_F00F_HACK) 1608extern int has_f00f_bug; 1609#endif 1610 1611static struct i386tss dblfault_tss; 1612static char dblfault_stack[PAGE_SIZE]; 1613 1614extern vm_offset_t proc0kstack; 1615 1616 1617/* 1618 * software prototypes -- in more palatable form. 1619 * 1620 * GCODE_SEL through GUDATA_SEL must be in this order for syscall/sysret 1621 * GUFS_SEL and GUGS_SEL must be in this order (swtch.s knows it) 1622 */ 1623struct soft_segment_descriptor gdt_segs[] = { 1624/* GNULL_SEL 0 Null Descriptor */ 1625{ .ssd_base = 0x0, 1626 .ssd_limit = 0x0, 1627 .ssd_type = 0, 1628 .ssd_dpl = SEL_KPL, 1629 .ssd_p = 0, 1630 .ssd_xx = 0, .ssd_xx1 = 0, 1631 .ssd_def32 = 0, 1632 .ssd_gran = 0 }, 1633/* GPRIV_SEL 1 SMP Per-Processor Private Data Descriptor */ 1634{ .ssd_base = 0x0, 1635 .ssd_limit = 0xfffff, 1636 .ssd_type = SDT_MEMRWA, 1637 .ssd_dpl = SEL_KPL, 1638 .ssd_p = 1, 1639 .ssd_xx = 0, .ssd_xx1 = 0, 1640 .ssd_def32 = 1, 1641 .ssd_gran = 1 }, 1642/* GUFS_SEL 2 %fs Descriptor for user */ 1643{ .ssd_base = 0x0, 1644 .ssd_limit = 0xfffff, 1645 .ssd_type = SDT_MEMRWA, 1646 .ssd_dpl = SEL_UPL, 1647 .ssd_p = 1, 1648 .ssd_xx = 0, .ssd_xx1 = 0, 1649 .ssd_def32 = 1, 1650 .ssd_gran = 1 }, 1651/* GUGS_SEL 3 %gs Descriptor for user */ 1652{ .ssd_base = 0x0, 1653 .ssd_limit = 0xfffff, 1654 .ssd_type = SDT_MEMRWA, 1655 .ssd_dpl = SEL_UPL, 1656 .ssd_p = 1, 1657 .ssd_xx = 0, .ssd_xx1 = 0, 1658 .ssd_def32 = 1, 1659 .ssd_gran = 1 }, 1660/* GCODE_SEL 4 Code Descriptor for kernel */ 1661{ .ssd_base = 0x0, 1662 .ssd_limit = 0xfffff, 1663 .ssd_type = SDT_MEMERA, 1664 .ssd_dpl = SEL_KPL, 1665 .ssd_p = 1, 1666 .ssd_xx = 0, .ssd_xx1 = 0, 1667 .ssd_def32 = 1, 1668 .ssd_gran = 1 }, 1669/* GDATA_SEL 5 Data Descriptor for kernel */ 1670{ .ssd_base = 0x0, 1671 .ssd_limit = 0xfffff, 1672 .ssd_type = SDT_MEMRWA, 1673 .ssd_dpl = SEL_KPL, 1674 .ssd_p = 1, 1675 .ssd_xx = 0, .ssd_xx1 = 0, 1676 .ssd_def32 = 1, 1677 .ssd_gran = 1 }, 1678/* GUCODE_SEL 6 Code Descriptor for user */ 1679{ .ssd_base = 0x0, 1680 .ssd_limit = 0xfffff, 1681 .ssd_type = SDT_MEMERA, 1682 .ssd_dpl = SEL_UPL, 1683 .ssd_p = 1, 1684 .ssd_xx = 0, .ssd_xx1 = 0, 1685 .ssd_def32 = 1, 1686 .ssd_gran = 1 }, 1687/* GUDATA_SEL 7 Data Descriptor for user */ 1688{ .ssd_base = 0x0, 1689 .ssd_limit = 0xfffff, 1690 .ssd_type = SDT_MEMRWA, 1691 .ssd_dpl = SEL_UPL, 1692 .ssd_p = 1, 1693 .ssd_xx = 0, .ssd_xx1 = 0, 1694 .ssd_def32 = 1, 1695 .ssd_gran = 1 }, 1696/* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */ 1697{ .ssd_base = 0x400, 1698 .ssd_limit = 0xfffff, 1699 .ssd_type = SDT_MEMRWA, 1700 .ssd_dpl = SEL_KPL, 1701 .ssd_p = 1, 1702 .ssd_xx = 0, .ssd_xx1 = 0, 1703 .ssd_def32 = 1, 1704 .ssd_gran = 1 }, 1705#ifndef XEN 1706/* GPROC0_SEL 9 Proc 0 Tss Descriptor */ 1707{ 1708 .ssd_base = 0x0, 1709 .ssd_limit = sizeof(struct i386tss)-1, 1710 .ssd_type = SDT_SYS386TSS, 1711 .ssd_dpl = 0, 1712 .ssd_p = 1, 1713 .ssd_xx = 0, .ssd_xx1 = 0, 1714 .ssd_def32 = 0, 1715 .ssd_gran = 0 }, 1716/* GLDT_SEL 10 LDT Descriptor */ 1717{ .ssd_base = (int) ldt, 1718 .ssd_limit = sizeof(ldt)-1, 1719 .ssd_type = SDT_SYSLDT, 1720 .ssd_dpl = SEL_UPL, 1721 .ssd_p = 1, 1722 .ssd_xx = 0, .ssd_xx1 = 0, 1723 .ssd_def32 = 0, 1724 .ssd_gran = 0 }, 1725/* GUSERLDT_SEL 11 User LDT Descriptor per process */ 1726{ .ssd_base = (int) ldt, 1727 .ssd_limit = (512 * sizeof(union descriptor)-1), 1728 .ssd_type = SDT_SYSLDT, 1729 .ssd_dpl = 0, 1730 .ssd_p = 1, 1731 .ssd_xx = 0, .ssd_xx1 = 0, 1732 .ssd_def32 = 0, 1733 .ssd_gran = 0 }, 1734/* GPANIC_SEL 12 Panic Tss Descriptor */ 1735{ .ssd_base = (int) &dblfault_tss, 1736 .ssd_limit = sizeof(struct i386tss)-1, 1737 .ssd_type = SDT_SYS386TSS, 1738 .ssd_dpl = 0, 1739 .ssd_p = 1, 1740 .ssd_xx = 0, .ssd_xx1 = 0, 1741 .ssd_def32 = 0, 1742 .ssd_gran = 0 }, 1743/* GBIOSCODE32_SEL 13 BIOS 32-bit interface (32bit Code) */ 1744{ .ssd_base = 0, 1745 .ssd_limit = 0xfffff, 1746 .ssd_type = SDT_MEMERA, 1747 .ssd_dpl = 0, 1748 .ssd_p = 1, 1749 .ssd_xx = 0, .ssd_xx1 = 0, 1750 .ssd_def32 = 0, 1751 .ssd_gran = 1 }, 1752/* GBIOSCODE16_SEL 14 BIOS 32-bit interface (16bit Code) */ 1753{ .ssd_base = 0, 1754 .ssd_limit = 0xfffff, 1755 .ssd_type = SDT_MEMERA, 1756 .ssd_dpl = 0, 1757 .ssd_p = 1, 1758 .ssd_xx = 0, .ssd_xx1 = 0, 1759 .ssd_def32 = 0, 1760 .ssd_gran = 1 }, 1761/* GBIOSDATA_SEL 15 BIOS 32-bit interface (Data) */ 1762{ .ssd_base = 0, 1763 .ssd_limit = 0xfffff, 1764 .ssd_type = SDT_MEMRWA, 1765 .ssd_dpl = 0, 1766 .ssd_p = 1, 1767 .ssd_xx = 0, .ssd_xx1 = 0, 1768 .ssd_def32 = 1, 1769 .ssd_gran = 1 }, 1770/* GBIOSUTIL_SEL 16 BIOS 16-bit interface (Utility) */ 1771{ .ssd_base = 0, 1772 .ssd_limit = 0xfffff, 1773 .ssd_type = SDT_MEMRWA, 1774 .ssd_dpl = 0, 1775 .ssd_p = 1, 1776 .ssd_xx = 0, .ssd_xx1 = 0, 1777 .ssd_def32 = 0, 1778 .ssd_gran = 1 }, 1779/* GBIOSARGS_SEL 17 BIOS 16-bit interface (Arguments) */ 1780{ .ssd_base = 0, 1781 .ssd_limit = 0xfffff, 1782 .ssd_type = SDT_MEMRWA, 1783 .ssd_dpl = 0, 1784 .ssd_p = 1, 1785 .ssd_xx = 0, .ssd_xx1 = 0, 1786 .ssd_def32 = 0, 1787 .ssd_gran = 1 }, 1788/* GNDIS_SEL 18 NDIS Descriptor */ 1789{ .ssd_base = 0x0, 1790 .ssd_limit = 0x0, 1791 .ssd_type = 0, 1792 .ssd_dpl = 0, 1793 .ssd_p = 0, 1794 .ssd_xx = 0, .ssd_xx1 = 0, 1795 .ssd_def32 = 0, 1796 .ssd_gran = 0 }, 1797#endif /* !XEN */ 1798}; 1799 1800static struct soft_segment_descriptor ldt_segs[] = { 1801 /* Null Descriptor - overwritten by call gate */ 1802{ .ssd_base = 0x0, 1803 .ssd_limit = 0x0, 1804 .ssd_type = 0, 1805 .ssd_dpl = 0, 1806 .ssd_p = 0, 1807 .ssd_xx = 0, .ssd_xx1 = 0, 1808 .ssd_def32 = 0, 1809 .ssd_gran = 0 }, 1810 /* Null Descriptor - overwritten by call gate */ 1811{ .ssd_base = 0x0, 1812 .ssd_limit = 0x0, 1813 .ssd_type = 0, 1814 .ssd_dpl = 0, 1815 .ssd_p = 0, 1816 .ssd_xx = 0, .ssd_xx1 = 0, 1817 .ssd_def32 = 0, 1818 .ssd_gran = 0 }, 1819 /* Null Descriptor - overwritten by call gate */ 1820{ .ssd_base = 0x0, 1821 .ssd_limit = 0x0, 1822 .ssd_type = 0, 1823 .ssd_dpl = 0, 1824 .ssd_p = 0, 1825 .ssd_xx = 0, .ssd_xx1 = 0, 1826 .ssd_def32 = 0, 1827 .ssd_gran = 0 }, 1828 /* Code Descriptor for user */ 1829{ .ssd_base = 0x0, 1830 .ssd_limit = 0xfffff, 1831 .ssd_type = SDT_MEMERA, 1832 .ssd_dpl = SEL_UPL, 1833 .ssd_p = 1, 1834 .ssd_xx = 0, .ssd_xx1 = 0, 1835 .ssd_def32 = 1, 1836 .ssd_gran = 1 }, 1837 /* Null Descriptor - overwritten by call gate */ 1838{ .ssd_base = 0x0, 1839 .ssd_limit = 0x0, 1840 .ssd_type = 0, 1841 .ssd_dpl = 0, 1842 .ssd_p = 0, 1843 .ssd_xx = 0, .ssd_xx1 = 0, 1844 .ssd_def32 = 0, 1845 .ssd_gran = 0 }, 1846 /* Data Descriptor for user */ 1847{ .ssd_base = 0x0, 1848 .ssd_limit = 0xfffff, 1849 .ssd_type = SDT_MEMRWA, 1850 .ssd_dpl = SEL_UPL, 1851 .ssd_p = 1, 1852 .ssd_xx = 0, .ssd_xx1 = 0, 1853 .ssd_def32 = 1, 1854 .ssd_gran = 1 }, 1855}; 1856 1857void 1858setidt(idx, func, typ, dpl, selec) 1859 int idx; 1860 inthand_t *func; 1861 int typ; 1862 int dpl; 1863 int selec; 1864{ 1865 struct gate_descriptor *ip; 1866 1867 ip = idt + idx; 1868 ip->gd_looffset = (int)func; 1869 ip->gd_selector = selec; 1870 ip->gd_stkcpy = 0; 1871 ip->gd_xx = 0; 1872 ip->gd_type = typ; 1873 ip->gd_dpl = dpl; 1874 ip->gd_p = 1; 1875 ip->gd_hioffset = ((int)func)>>16 ; 1876} 1877 1878extern inthand_t 1879 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), 1880 IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), 1881 IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), 1882 IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), 1883 IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall); 1884 1885#ifdef DDB 1886/* 1887 * Display the index and function name of any IDT entries that don't use 1888 * the default 'rsvd' entry point. 1889 */ 1890DB_SHOW_COMMAND(idt, db_show_idt) 1891{ 1892 struct gate_descriptor *ip; 1893 int idx; 1894 uintptr_t func; 1895 1896 ip = idt; 1897 for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { 1898 func = (ip->gd_hioffset << 16 | ip->gd_looffset); 1899 if (func != (uintptr_t)&IDTVEC(rsvd)) { 1900 db_printf("%3d\t", idx); 1901 db_printsym(func, DB_STGY_PROC); 1902 db_printf("\n"); 1903 } 1904 ip++; 1905 } 1906} 1907 1908/* Show privileged registers. */ 1909DB_SHOW_COMMAND(sysregs, db_show_sysregs) 1910{ 1911 uint64_t idtr, gdtr; 1912 1913 idtr = ridt(); 1914 db_printf("idtr\t0x%08x/%04x\n", 1915 (u_int)(idtr >> 16), (u_int)idtr & 0xffff); 1916 gdtr = rgdt(); 1917 db_printf("gdtr\t0x%08x/%04x\n", 1918 (u_int)(gdtr >> 16), (u_int)gdtr & 0xffff); 1919 db_printf("ldtr\t0x%04x\n", rldt()); 1920 db_printf("tr\t0x%04x\n", rtr()); 1921 db_printf("cr0\t0x%08x\n", rcr0()); 1922 db_printf("cr2\t0x%08x\n", rcr2()); 1923 db_printf("cr3\t0x%08x\n", rcr3()); 1924 db_printf("cr4\t0x%08x\n", rcr4()); 1925} 1926#endif 1927 1928void 1929sdtossd(sd, ssd) 1930 struct segment_descriptor *sd; 1931 struct soft_segment_descriptor *ssd; 1932{ 1933 ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; 1934 ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; 1935 ssd->ssd_type = sd->sd_type; 1936 ssd->ssd_dpl = sd->sd_dpl; 1937 ssd->ssd_p = sd->sd_p; 1938 ssd->ssd_def32 = sd->sd_def32; 1939 ssd->ssd_gran = sd->sd_gran; 1940} 1941 1942static int 1943add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp) 1944{ 1945 int i, physmap_idx; 1946 1947 physmap_idx = *physmap_idxp; 1948 1949 if (boothowto & RB_VERBOSE) 1950 printf("SMAP type=%02x base=%016llx len=%016llx\n", 1951 smap->type, smap->base, smap->length); 1952 1953 if (smap->type != SMAP_TYPE_MEMORY) 1954 return (1); 1955 1956 if (smap->length == 0) 1957 return (1); 1958 1959#ifndef PAE 1960 if (smap->base >= 0xffffffff) { 1961 printf("%uK of memory above 4GB ignored\n", 1962 (u_int)(smap->length / 1024)); 1963 return (1); 1964 } 1965#endif 1966 1967 for (i = 0; i <= physmap_idx; i += 2) { 1968 if (smap->base < physmap[i + 1]) { 1969 if (boothowto & RB_VERBOSE) 1970 printf( 1971 "Overlapping or non-monotonic memory region, ignoring second region\n"); 1972 return (1); 1973 } 1974 } 1975 1976 if (smap->base == physmap[physmap_idx + 1]) { 1977 physmap[physmap_idx + 1] += smap->length; 1978 return (1); 1979 } 1980 1981 physmap_idx += 2; 1982 *physmap_idxp = physmap_idx; 1983 if (physmap_idx == PHYSMAP_SIZE) { 1984 printf( 1985 "Too many segments in the physical address map, giving up\n"); 1986 return (0); 1987 } 1988 physmap[physmap_idx] = smap->base; 1989 physmap[physmap_idx + 1] = smap->base + smap->length; 1990 return (1); 1991} 1992 1993/* 1994 * Populate the (physmap) array with base/bound pairs describing the 1995 * available physical memory in the system, then test this memory and 1996 * build the phys_avail array describing the actually-available memory. 1997 * 1998 * If we cannot accurately determine the physical memory map, then use 1999 * value from the 0xE801 call, and failing that, the RTC. 2000 * 2001 * Total memory size may be set by the kernel environment variable 2002 * hw.physmem or the compile-time define MAXMEM. 2003 * 2004 * XXX first should be vm_paddr_t. 2005 */ 2006static void 2007getmemsize(int first) 2008{ 2009 int i, off, physmap_idx, pa_indx, da_indx; 2010 int hasbrokenint12, has_smap; 2011 u_long physmem_tunable; 2012 u_int extmem; 2013 struct vm86frame vmf; 2014 struct vm86context vmc; 2015 vm_paddr_t pa, physmap[PHYSMAP_SIZE]; 2016 pt_entry_t *pte; 2017 struct bios_smap *smap, *smapbase, *smapend; 2018 u_int32_t smapsize; 2019 quad_t dcons_addr, dcons_size; 2020 caddr_t kmdp; 2021 2022 has_smap = 0; 2023#ifdef XBOX 2024 if (arch_i386_is_xbox) { 2025 /* 2026 * We queried the memory size before, so chop off 4MB for 2027 * the framebuffer and inform the OS of this. 2028 */ 2029 physmap[0] = 0; 2030 physmap[1] = (arch_i386_xbox_memsize * 1024 * 1024) - XBOX_FB_SIZE; 2031 physmap_idx = 0; 2032 goto physmap_done; 2033 } 2034#endif 2035#if defined(XEN) 2036 has_smap = 0; 2037 Maxmem = xen_start_info->nr_pages - init_first; 2038 physmem = Maxmem; 2039 basemem = 0; 2040 physmap[0] = init_first << PAGE_SHIFT; 2041 physmap[1] = ptoa(Maxmem) - round_page(MSGBUF_SIZE); 2042 physmap_idx = 0; 2043 goto physmap_done; 2044#endif 2045 hasbrokenint12 = 0; 2046 TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12); 2047 bzero(&vmf, sizeof(vmf)); 2048 bzero(physmap, sizeof(physmap)); 2049 basemem = 0; 2050 2051 /* 2052 * Some newer BIOSes has broken INT 12H implementation which cause 2053 * kernel panic immediately. In this case, we need to scan SMAP 2054 * with INT 15:E820 first, then determine base memory size. 2055 */ 2056 if (hasbrokenint12) { 2057 goto int15e820; 2058 } 2059 2060 /* 2061 * Perform "base memory" related probes & setup 2062 */ 2063 vm86_intcall(0x12, &vmf); 2064 basemem = vmf.vmf_ax; 2065 if (basemem > 640) { 2066 printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", 2067 basemem); 2068 basemem = 640; 2069 } 2070 2071 /* 2072 * XXX if biosbasemem is now < 640, there is a `hole' 2073 * between the end of base memory and the start of 2074 * ISA memory. The hole may be empty or it may 2075 * contain BIOS code or data. Map it read/write so 2076 * that the BIOS can write to it. (Memory from 0 to 2077 * the physical end of the kernel is mapped read-only 2078 * to begin with and then parts of it are remapped. 2079 * The parts that aren't remapped form holes that 2080 * remain read-only and are unused by the kernel. 2081 * The base memory area is below the physical end of 2082 * the kernel and right now forms a read-only hole. 2083 * The part of it from PAGE_SIZE to 2084 * (trunc_page(biosbasemem * 1024) - 1) will be 2085 * remapped and used by the kernel later.) 2086 * 2087 * This code is similar to the code used in 2088 * pmap_mapdev, but since no memory needs to be 2089 * allocated we simply change the mapping. 2090 */ 2091 for (pa = trunc_page(basemem * 1024); 2092 pa < ISA_HOLE_START; pa += PAGE_SIZE) 2093 pmap_kenter(KERNBASE + pa, pa); 2094 2095 /* 2096 * Map pages between basemem and ISA_HOLE_START, if any, r/w into 2097 * the vm86 page table so that vm86 can scribble on them using 2098 * the vm86 map too. XXX: why 2 ways for this and only 1 way for 2099 * page 0, at least as initialized here? 2100 */ 2101 pte = (pt_entry_t *)vm86paddr; 2102 for (i = basemem / 4; i < 160; i++) 2103 pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; 2104 2105int15e820: 2106 /* 2107 * Fetch the memory map with INT 15:E820. First, check to see 2108 * if the loader supplied it and use that if so. Otherwise, 2109 * use vm86 to invoke the BIOS call directly. 2110 */ 2111 physmap_idx = 0; 2112 smapbase = NULL; 2113 kmdp = preload_search_by_type("elf kernel"); 2114 if (kmdp == NULL) 2115 kmdp = preload_search_by_type("elf32 kernel"); 2116 if (kmdp != NULL) 2117 smapbase = (struct bios_smap *)preload_search_info(kmdp, 2118 MODINFO_METADATA | MODINFOMD_SMAP); 2119 if (smapbase != NULL) { 2120 /* subr_module.c says: 2121 * "Consumer may safely assume that size value precedes data." 2122 * ie: an int32_t immediately precedes smap. 2123 */ 2124 smapsize = *((u_int32_t *)smapbase - 1); 2125 smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); 2126 has_smap = 1; 2127 2128 for (smap = smapbase; smap < smapend; smap++) 2129 if (!add_smap_entry(smap, physmap, &physmap_idx)) 2130 break; 2131 } else { 2132 /* 2133 * map page 1 R/W into the kernel page table so we can use it 2134 * as a buffer. The kernel will unmap this page later. 2135 */ 2136 pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT); 2137 vmc.npages = 0; 2138 smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + 2139 (1 << PAGE_SHIFT)); 2140 vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di); 2141 2142 vmf.vmf_ebx = 0; 2143 do { 2144 vmf.vmf_eax = 0xE820; 2145 vmf.vmf_edx = SMAP_SIG; 2146 vmf.vmf_ecx = sizeof(struct bios_smap); 2147 i = vm86_datacall(0x15, &vmf, &vmc); 2148 if (i || vmf.vmf_eax != SMAP_SIG) 2149 break; 2150 has_smap = 1; 2151 if (!add_smap_entry(smap, physmap, &physmap_idx)) 2152 break; 2153 } while (vmf.vmf_ebx != 0); 2154 } 2155 2156 /* 2157 * Perform "base memory" related probes & setup based on SMAP 2158 */ 2159 if (basemem == 0) { 2160 for (i = 0; i <= physmap_idx; i += 2) { 2161 if (physmap[i] == 0x00000000) { 2162 basemem = physmap[i + 1] / 1024; 2163 break; 2164 } 2165 } 2166 2167 /* 2168 * XXX this function is horribly organized and has to the same 2169 * things that it does above here. 2170 */ 2171 if (basemem == 0) 2172 basemem = 640; 2173 if (basemem > 640) { 2174 printf( 2175 "Preposterous BIOS basemem of %uK, truncating to 640K\n", 2176 basemem); 2177 basemem = 640; 2178 } 2179 2180 /* 2181 * Let vm86 scribble on pages between basemem and 2182 * ISA_HOLE_START, as above. 2183 */ 2184 for (pa = trunc_page(basemem * 1024); 2185 pa < ISA_HOLE_START; pa += PAGE_SIZE) 2186 pmap_kenter(KERNBASE + pa, pa); 2187 pte = (pt_entry_t *)vm86paddr; 2188 for (i = basemem / 4; i < 160; i++) 2189 pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; 2190 } 2191 2192 if (physmap[1] != 0) 2193 goto physmap_done; 2194 2195 /* 2196 * If we failed above, try memory map with INT 15:E801 2197 */ 2198 vmf.vmf_ax = 0xE801; 2199 if (vm86_intcall(0x15, &vmf) == 0) { 2200 extmem = vmf.vmf_cx + vmf.vmf_dx * 64; 2201 } else { 2202#if 0 2203 vmf.vmf_ah = 0x88; 2204 vm86_intcall(0x15, &vmf); 2205 extmem = vmf.vmf_ax; 2206#elif !defined(XEN) 2207 /* 2208 * Prefer the RTC value for extended memory. 2209 */ 2210 extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8); 2211#endif 2212 } 2213 2214 /* 2215 * Special hack for chipsets that still remap the 384k hole when 2216 * there's 16MB of memory - this really confuses people that 2217 * are trying to use bus mastering ISA controllers with the 2218 * "16MB limit"; they only have 16MB, but the remapping puts 2219 * them beyond the limit. 2220 * 2221 * If extended memory is between 15-16MB (16-17MB phys address range), 2222 * chop it to 15MB. 2223 */ 2224 if ((extmem > 15 * 1024) && (extmem < 16 * 1024)) 2225 extmem = 15 * 1024; 2226 2227 physmap[0] = 0; 2228 physmap[1] = basemem * 1024; 2229 physmap_idx = 2; 2230 physmap[physmap_idx] = 0x100000; 2231 physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024; 2232 2233physmap_done: 2234 /* 2235 * Now, physmap contains a map of physical memory. 2236 */ 2237 2238#ifdef SMP 2239 /* make hole for AP bootstrap code */ 2240 physmap[1] = mp_bootaddress(physmap[1]); 2241#endif 2242 2243 /* 2244 * Maxmem isn't the "maximum memory", it's one larger than the 2245 * highest page of the physical address space. It should be 2246 * called something like "Maxphyspage". We may adjust this 2247 * based on ``hw.physmem'' and the results of the memory test. 2248 */ 2249 Maxmem = atop(physmap[physmap_idx + 1]); 2250 2251#ifdef MAXMEM 2252 Maxmem = MAXMEM / 4; 2253#endif 2254 2255 if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) 2256 Maxmem = atop(physmem_tunable); 2257 2258 /* 2259 * If we have an SMAP, don't allow MAXMEM or hw.physmem to extend 2260 * the amount of memory in the system. 2261 */ 2262 if (has_smap && Maxmem > atop(physmap[physmap_idx + 1])) 2263 Maxmem = atop(physmap[physmap_idx + 1]); 2264 2265 if (atop(physmap[physmap_idx + 1]) != Maxmem && 2266 (boothowto & RB_VERBOSE)) 2267 printf("Physical memory use set to %ldK\n", Maxmem * 4); 2268 2269 /* 2270 * If Maxmem has been increased beyond what the system has detected, 2271 * extend the last memory segment to the new limit. 2272 */ 2273 if (atop(physmap[physmap_idx + 1]) < Maxmem) 2274 physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem); 2275 2276 /* call pmap initialization to make new kernel address space */ 2277 pmap_bootstrap(first); 2278 2279 /* 2280 * Size up each available chunk of physical memory. 2281 */ 2282 physmap[0] = PAGE_SIZE; /* mask off page 0 */ 2283 pa_indx = 0; 2284 da_indx = 1; 2285 phys_avail[pa_indx++] = physmap[0]; 2286 phys_avail[pa_indx] = physmap[0]; 2287 dump_avail[da_indx] = physmap[0]; 2288 pte = CMAP1; 2289 2290 /* 2291 * Get dcons buffer address 2292 */ 2293 if (getenv_quad("dcons.addr", &dcons_addr) == 0 || 2294 getenv_quad("dcons.size", &dcons_size) == 0) 2295 dcons_addr = 0; 2296 2297#ifndef XEN 2298 /* 2299 * physmap is in bytes, so when converting to page boundaries, 2300 * round up the start address and round down the end address. 2301 */ 2302 for (i = 0; i <= physmap_idx; i += 2) { 2303 vm_paddr_t end; 2304 2305 end = ptoa((vm_paddr_t)Maxmem); 2306 if (physmap[i + 1] < end) 2307 end = trunc_page(physmap[i + 1]); 2308 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { 2309 int tmp, page_bad, full; 2310 int *ptr = (int *)CADDR1; 2311 2312 full = FALSE; 2313 /* 2314 * block out kernel memory as not available. 2315 */ 2316 if (pa >= KERNLOAD && pa < first) 2317 goto do_dump_avail; 2318 2319 /* 2320 * block out dcons buffer 2321 */ 2322 if (dcons_addr > 0 2323 && pa >= trunc_page(dcons_addr) 2324 && pa < dcons_addr + dcons_size) 2325 goto do_dump_avail; 2326 2327 page_bad = FALSE; 2328 2329 /* 2330 * map page into kernel: valid, read/write,non-cacheable 2331 */ 2332 *pte = pa | PG_V | PG_RW | PG_N; 2333 invltlb(); 2334 2335 tmp = *(int *)ptr; 2336 /* 2337 * Test for alternating 1's and 0's 2338 */ 2339 *(volatile int *)ptr = 0xaaaaaaaa; 2340 if (*(volatile int *)ptr != 0xaaaaaaaa) 2341 page_bad = TRUE; 2342 /* 2343 * Test for alternating 0's and 1's 2344 */ 2345 *(volatile int *)ptr = 0x55555555; 2346 if (*(volatile int *)ptr != 0x55555555) 2347 page_bad = TRUE; 2348 /* 2349 * Test for all 1's 2350 */ 2351 *(volatile int *)ptr = 0xffffffff; 2352 if (*(volatile int *)ptr != 0xffffffff) 2353 page_bad = TRUE; 2354 /* 2355 * Test for all 0's 2356 */ 2357 *(volatile int *)ptr = 0x0; 2358 if (*(volatile int *)ptr != 0x0) 2359 page_bad = TRUE; 2360 /* 2361 * Restore original value. 2362 */ 2363 *(int *)ptr = tmp; 2364 2365 /* 2366 * Adjust array of valid/good pages. 2367 */ 2368 if (page_bad == TRUE) 2369 continue; 2370 /* 2371 * If this good page is a continuation of the 2372 * previous set of good pages, then just increase 2373 * the end pointer. Otherwise start a new chunk. 2374 * Note that "end" points one higher than end, 2375 * making the range >= start and < end. 2376 * If we're also doing a speculative memory 2377 * test and we at or past the end, bump up Maxmem 2378 * so that we keep going. The first bad page 2379 * will terminate the loop. 2380 */ 2381 if (phys_avail[pa_indx] == pa) { 2382 phys_avail[pa_indx] += PAGE_SIZE; 2383 } else { 2384 pa_indx++; 2385 if (pa_indx == PHYS_AVAIL_ARRAY_END) { 2386 printf( 2387 "Too many holes in the physical address space, giving up\n"); 2388 pa_indx--; 2389 full = TRUE; 2390 goto do_dump_avail; 2391 } 2392 phys_avail[pa_indx++] = pa; /* start */ 2393 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ 2394 } 2395 physmem++; 2396do_dump_avail: 2397 if (dump_avail[da_indx] == pa) { 2398 dump_avail[da_indx] += PAGE_SIZE; 2399 } else { 2400 da_indx++; 2401 if (da_indx == DUMP_AVAIL_ARRAY_END) { 2402 da_indx--; 2403 goto do_next; 2404 } 2405 dump_avail[da_indx++] = pa; /* start */ 2406 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ 2407 } 2408do_next: 2409 if (full) 2410 break; 2411 } 2412 } 2413 *pte = 0; 2414 invltlb(); 2415#else 2416 phys_avail[0] = physfree; 2417 phys_avail[1] = xen_start_info->nr_pages*PAGE_SIZE; 2418#endif 2419 2420 /* 2421 * XXX 2422 * The last chunk must contain at least one page plus the message 2423 * buffer to avoid complicating other code (message buffer address 2424 * calculation, etc.). 2425 */ 2426 while (phys_avail[pa_indx - 1] + PAGE_SIZE + 2427 round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) { 2428 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); 2429 phys_avail[pa_indx--] = 0; 2430 phys_avail[pa_indx--] = 0; 2431 } 2432 2433 Maxmem = atop(phys_avail[pa_indx]); 2434 2435 /* Trim off space for the message buffer. */ 2436 phys_avail[pa_indx] -= round_page(MSGBUF_SIZE); 2437 2438 /* Map the message buffer. */ 2439 for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE) 2440 pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] + 2441 off); 2442 2443 PT_UPDATES_FLUSH(); 2444} 2445 2446#ifdef XEN 2447#define MTOPSIZE (1<<(14 + PAGE_SHIFT)) 2448 2449void 2450init386(first) 2451 int first; 2452{ 2453 unsigned long gdtmachpfn; 2454 int error, gsel_tss, metadata_missing, x, pa; 2455 struct pcpu *pc; 2456 struct callback_register event = { 2457 .type = CALLBACKTYPE_event, 2458 .address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)Xhypervisor_callback }, 2459 }; 2460 struct callback_register failsafe = { 2461 .type = CALLBACKTYPE_failsafe, 2462 .address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback }, 2463 }; 2464 2465 thread0.td_kstack = proc0kstack; 2466 thread0.td_pcb = (struct pcb *) 2467 (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; 2468 2469 /* 2470 * This may be done better later if it gets more high level 2471 * components in it. If so just link td->td_proc here. 2472 */ 2473 proc_linkup0(&proc0, &thread0); 2474 2475 metadata_missing = 0; 2476 if (xen_start_info->mod_start) { 2477 preload_metadata = (caddr_t)xen_start_info->mod_start; 2478 preload_bootstrap_relocate(KERNBASE); 2479 } else { 2480 metadata_missing = 1; 2481 } 2482 if (envmode == 1) 2483 kern_envp = static_env; 2484 else if ((caddr_t)xen_start_info->cmd_line) 2485 kern_envp = xen_setbootenv((caddr_t)xen_start_info->cmd_line); 2486 2487 boothowto |= xen_boothowto(kern_envp); 2488 2489 /* Init basic tunables, hz etc */ 2490 init_param1(); 2491 2492 /* 2493 * XEN occupies a portion of the upper virtual address space 2494 * At its base it manages an array mapping machine page frames 2495 * to physical page frames - hence we need to be able to 2496 * access 4GB - (64MB - 4MB + 64k) 2497 */ 2498 gdt_segs[GPRIV_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); 2499 gdt_segs[GUFS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); 2500 gdt_segs[GUGS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); 2501 gdt_segs[GCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); 2502 gdt_segs[GDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); 2503 gdt_segs[GUCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); 2504 gdt_segs[GUDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); 2505 gdt_segs[GBIOSLOWMEM_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); 2506 2507 pc = &__pcpu[0]; 2508 gdt_segs[GPRIV_SEL].ssd_base = (int) pc; 2509 gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; 2510 2511 PT_SET_MA(gdt, xpmap_ptom(VTOP(gdt)) | PG_V | PG_RW); 2512 bzero(gdt, PAGE_SIZE); 2513 for (x = 0; x < NGDT; x++) 2514 ssdtosd(&gdt_segs[x], &gdt[x].sd); 2515 2516 mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN); 2517 2518 gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT; 2519 PT_SET_MA(gdt, xpmap_ptom(VTOP(gdt)) | PG_V); 2520 PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, 512) != 0); 2521 lgdt(&r_gdt); 2522 gdtset = 1; 2523 2524 if ((error = HYPERVISOR_set_trap_table(trap_table)) != 0) { 2525 panic("set_trap_table failed - error %d\n", error); 2526 } 2527 2528 error = HYPERVISOR_callback_op(CALLBACKOP_register, &event); 2529 if (error == 0) 2530 error = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe); 2531#if CONFIG_XEN_COMPAT <= 0x030002 2532 if (error == -ENOXENSYS) 2533 HYPERVISOR_set_callbacks(GSEL(GCODE_SEL, SEL_KPL), 2534 (unsigned long)Xhypervisor_callback, 2535 GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback); 2536#endif 2537 pcpu_init(pc, 0, sizeof(struct pcpu)); 2538 for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE) 2539 pmap_kenter(pa + KERNBASE, pa); 2540 dpcpu_init((void *)(first + KERNBASE), 0); 2541 first += DPCPU_SIZE; 2542 2543 PCPU_SET(prvspace, pc); 2544 PCPU_SET(curthread, &thread0); 2545 PCPU_SET(curpcb, thread0.td_pcb); 2546 2547 /* 2548 * Initialize mutexes. 2549 * 2550 * icu_lock: in order to allow an interrupt to occur in a critical 2551 * section, to set pcpu->ipending (etc...) properly, we 2552 * must be able to get the icu lock, so it can't be 2553 * under witness. 2554 */ 2555 mutex_init(); 2556 mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE); 2557 2558 /* make ldt memory segments */ 2559 PT_SET_MA(ldt, xpmap_ptom(VTOP(ldt)) | PG_V | PG_RW); 2560 bzero(ldt, PAGE_SIZE); 2561 ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1); 2562 ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1); 2563 for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) 2564 ssdtosd(&ldt_segs[x], &ldt[x].sd); 2565 2566 default_proc_ldt.ldt_base = (caddr_t)ldt; 2567 default_proc_ldt.ldt_len = 6; 2568 _default_ldt = (int)&default_proc_ldt; 2569 PCPU_SET(currentldt, _default_ldt) 2570 PT_SET_MA(ldt, *vtopte((unsigned long)ldt) & ~PG_RW); 2571 xen_set_ldt((unsigned long) ldt, (sizeof ldt_segs / sizeof ldt_segs[0])); 2572 2573#if defined(XEN_PRIVILEGED) 2574 /* 2575 * Initialize the i8254 before the console so that console 2576 * initialization can use DELAY(). 2577 */ 2578 i8254_init(); 2579#endif 2580 2581 /* 2582 * Initialize the console before we print anything out. 2583 */ 2584 cninit(); 2585 2586 if (metadata_missing) 2587 printf("WARNING: loader(8) metadata is missing!\n"); 2588 2589#ifdef DEV_ISA 2590 elcr_probe(); 2591 atpic_startup(); 2592#endif 2593 2594#ifdef DDB 2595 ksym_start = bootinfo.bi_symtab; 2596 ksym_end = bootinfo.bi_esymtab; 2597#endif 2598 2599 kdb_init(); 2600 2601#ifdef KDB 2602 if (boothowto & RB_KDB) 2603 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); 2604#endif 2605 2606 finishidentcpu(); /* Final stage of CPU initialization */ 2607 setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, 2608 GSEL(GCODE_SEL, SEL_KPL)); 2609 setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, 2610 GSEL(GCODE_SEL, SEL_KPL)); 2611 initializecpu(); /* Initialize CPU registers */ 2612 2613 /* make an initial tss so cpu can get interrupt stack on syscall! */ 2614 /* Note: -16 is so we can grow the trapframe if we came from vm86 */ 2615 PCPU_SET(common_tss.tss_esp0, thread0.td_kstack + 2616 KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16); 2617 PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); 2618 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 2619 HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), 2620 PCPU_GET(common_tss.tss_esp0)); 2621 2622 /* pointer to selector slot for %fs/%gs */ 2623 PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); 2624 2625 dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = 2626 dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; 2627 dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = 2628 dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); 2629#ifdef PAE 2630 dblfault_tss.tss_cr3 = (int)IdlePDPT; 2631#else 2632 dblfault_tss.tss_cr3 = (int)IdlePTD; 2633#endif 2634 dblfault_tss.tss_eip = (int)dblfault_handler; 2635 dblfault_tss.tss_eflags = PSL_KERNEL; 2636 dblfault_tss.tss_ds = dblfault_tss.tss_es = 2637 dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); 2638 dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); 2639 dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); 2640 dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); 2641 2642 vm86_initialize(); 2643 getmemsize(first); 2644 init_param2(physmem); 2645 2646 /* now running on new page tables, configured,and u/iom is accessible */ 2647 2648 msgbufinit(msgbufp, MSGBUF_SIZE); 2649 /* transfer to user mode */ 2650 2651 _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); 2652 _udatasel = GSEL(GUDATA_SEL, SEL_UPL); 2653 2654 /* setup proc 0's pcb */ 2655 thread0.td_pcb->pcb_flags = 0; 2656#ifdef PAE 2657 thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; 2658#else 2659 thread0.td_pcb->pcb_cr3 = (int)IdlePTD; 2660#endif 2661 thread0.td_pcb->pcb_ext = 0; 2662 thread0.td_frame = &proc0_tf; 2663 thread0.td_pcb->pcb_fsd = PCPU_GET(fsgs_gdt)[0]; 2664 thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1]; 2665 2666 if (cpu_probe_amdc1e()) 2667 cpu_idle_fn = cpu_idle_amdc1e; 2668} 2669 2670#else 2671void 2672init386(first) 2673 int first; 2674{ 2675 struct gate_descriptor *gdp; 2676 int gsel_tss, metadata_missing, x, pa; 2677 struct pcpu *pc; 2678 2679 thread0.td_kstack = proc0kstack; 2680 thread0.td_pcb = (struct pcb *) 2681 (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; 2682 2683 /* 2684 * This may be done better later if it gets more high level 2685 * components in it. If so just link td->td_proc here. 2686 */ 2687 proc_linkup0(&proc0, &thread0); 2688 2689 metadata_missing = 0; 2690 if (bootinfo.bi_modulep) { 2691 preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE; 2692 preload_bootstrap_relocate(KERNBASE); 2693 } else { 2694 metadata_missing = 1; 2695 } 2696 if (envmode == 1) 2697 kern_envp = static_env; 2698 else if (bootinfo.bi_envp) 2699 kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE; 2700 2701 /* Init basic tunables, hz etc */ 2702 init_param1(); 2703 2704 /* 2705 * Make gdt memory segments. All segments cover the full 4GB 2706 * of address space and permissions are enforced at page level. 2707 */ 2708 gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1); 2709 gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1); 2710 gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1); 2711 gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1); 2712 gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1); 2713 gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1); 2714 2715 pc = &__pcpu[0]; 2716 gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1); 2717 gdt_segs[GPRIV_SEL].ssd_base = (int) pc; 2718 gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; 2719 2720 for (x = 0; x < NGDT; x++) 2721 ssdtosd(&gdt_segs[x], &gdt[x].sd); 2722 2723 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 2724 r_gdt.rd_base = (int) gdt; 2725 mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN); 2726 lgdt(&r_gdt); 2727 2728 pcpu_init(pc, 0, sizeof(struct pcpu)); 2729 for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE) 2730 pmap_kenter(pa + KERNBASE, pa); 2731 dpcpu_init((void *)(first + KERNBASE), 0); 2732 first += DPCPU_SIZE; 2733 PCPU_SET(prvspace, pc); 2734 PCPU_SET(curthread, &thread0); 2735 PCPU_SET(curpcb, thread0.td_pcb); 2736 2737 /* 2738 * Initialize mutexes. 2739 * 2740 * icu_lock: in order to allow an interrupt to occur in a critical 2741 * section, to set pcpu->ipending (etc...) properly, we 2742 * must be able to get the icu lock, so it can't be 2743 * under witness. 2744 */ 2745 mutex_init(); 2746 mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE); 2747 2748 /* make ldt memory segments */ 2749 ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1); 2750 ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1); 2751 for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) 2752 ssdtosd(&ldt_segs[x], &ldt[x].sd); 2753 2754 _default_ldt = GSEL(GLDT_SEL, SEL_KPL); 2755 lldt(_default_ldt); 2756 PCPU_SET(currentldt, _default_ldt); 2757 2758 /* exceptions */ 2759 for (x = 0; x < NIDT; x++) 2760 setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, 2761 GSEL(GCODE_SEL, SEL_KPL)); 2762 setidt(IDT_DE, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, 2763 GSEL(GCODE_SEL, SEL_KPL)); 2764 setidt(IDT_DB, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL, 2765 GSEL(GCODE_SEL, SEL_KPL)); 2766 setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYS386IGT, SEL_KPL, 2767 GSEL(GCODE_SEL, SEL_KPL)); 2768 setidt(IDT_BP, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL, 2769 GSEL(GCODE_SEL, SEL_KPL)); 2770 setidt(IDT_OF, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, 2771 GSEL(GCODE_SEL, SEL_KPL)); 2772 setidt(IDT_BR, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, 2773 GSEL(GCODE_SEL, SEL_KPL)); 2774 setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, 2775 GSEL(GCODE_SEL, SEL_KPL)); 2776 setidt(IDT_NM, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL 2777 , GSEL(GCODE_SEL, SEL_KPL)); 2778 setidt(IDT_DF, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); 2779 setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, 2780 GSEL(GCODE_SEL, SEL_KPL)); 2781 setidt(IDT_TS, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, 2782 GSEL(GCODE_SEL, SEL_KPL)); 2783 setidt(IDT_NP, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, 2784 GSEL(GCODE_SEL, SEL_KPL)); 2785 setidt(IDT_SS, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, 2786 GSEL(GCODE_SEL, SEL_KPL)); 2787 setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, 2788 GSEL(GCODE_SEL, SEL_KPL)); 2789 setidt(IDT_PF, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL, 2790 GSEL(GCODE_SEL, SEL_KPL)); 2791 setidt(IDT_MF, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, 2792 GSEL(GCODE_SEL, SEL_KPL)); 2793 setidt(IDT_AC, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, 2794 GSEL(GCODE_SEL, SEL_KPL)); 2795 setidt(IDT_MC, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, 2796 GSEL(GCODE_SEL, SEL_KPL)); 2797 setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL, 2798 GSEL(GCODE_SEL, SEL_KPL)); 2799 setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL, 2800 GSEL(GCODE_SEL, SEL_KPL)); 2801 2802 r_idt.rd_limit = sizeof(idt0) - 1; 2803 r_idt.rd_base = (int) idt; 2804 lidt(&r_idt); 2805 2806#ifdef XBOX 2807 /* 2808 * The following code queries the PCI ID of 0:0:0. For the XBOX, 2809 * This should be 0x10de / 0x02a5. 2810 * 2811 * This is exactly what Linux does. 2812 */ 2813 outl(0xcf8, 0x80000000); 2814 if (inl(0xcfc) == 0x02a510de) { 2815 arch_i386_is_xbox = 1; 2816 pic16l_setled(XBOX_LED_GREEN); 2817 2818 /* 2819 * We are an XBOX, but we may have either 64MB or 128MB of 2820 * memory. The PCI host bridge should be programmed for this, 2821 * so we just query it. 2822 */ 2823 outl(0xcf8, 0x80000084); 2824 arch_i386_xbox_memsize = (inl(0xcfc) == 0x7FFFFFF) ? 128 : 64; 2825 } 2826#endif /* XBOX */ 2827 2828 /* 2829 * Initialize the i8254 before the console so that console 2830 * initialization can use DELAY(). 2831 */ 2832 i8254_init(); 2833 2834 /* 2835 * Initialize the console before we print anything out. 2836 */ 2837 cninit(); 2838 2839 if (metadata_missing) 2840 printf("WARNING: loader(8) metadata is missing!\n"); 2841 2842#ifdef DEV_ISA 2843 elcr_probe(); 2844 atpic_startup(); 2845#endif 2846 2847#ifdef DDB 2848 ksym_start = bootinfo.bi_symtab; 2849 ksym_end = bootinfo.bi_esymtab; 2850#endif 2851 2852 kdb_init(); 2853 2854#ifdef KDB 2855 if (boothowto & RB_KDB) 2856 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); 2857#endif 2858 2859 finishidentcpu(); /* Final stage of CPU initialization */ 2860 setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, 2861 GSEL(GCODE_SEL, SEL_KPL)); 2862 setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, 2863 GSEL(GCODE_SEL, SEL_KPL)); 2864 initializecpu(); /* Initialize CPU registers */ 2865 2866 /* make an initial tss so cpu can get interrupt stack on syscall! */ 2867 /* Note: -16 is so we can grow the trapframe if we came from vm86 */ 2868 PCPU_SET(common_tss.tss_esp0, thread0.td_kstack + 2869 KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16); 2870 PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); 2871 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 2872 PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd); 2873 PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); 2874 PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); 2875 ltr(gsel_tss); 2876 2877 /* pointer to selector slot for %fs/%gs */ 2878 PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); 2879 2880 dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = 2881 dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; 2882 dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = 2883 dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); 2884#ifdef PAE 2885 dblfault_tss.tss_cr3 = (int)IdlePDPT; 2886#else 2887 dblfault_tss.tss_cr3 = (int)IdlePTD; 2888#endif 2889 dblfault_tss.tss_eip = (int)dblfault_handler; 2890 dblfault_tss.tss_eflags = PSL_KERNEL; 2891 dblfault_tss.tss_ds = dblfault_tss.tss_es = 2892 dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); 2893 dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); 2894 dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); 2895 dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); 2896 2897 vm86_initialize(); 2898 getmemsize(first); 2899 init_param2(physmem); 2900 2901 /* now running on new page tables, configured,and u/iom is accessible */ 2902 2903 msgbufinit(msgbufp, MSGBUF_SIZE); 2904 2905 /* make a call gate to reenter kernel with */ 2906 gdp = &ldt[LSYS5CALLS_SEL].gd; 2907 2908 x = (int) &IDTVEC(lcall_syscall); 2909 gdp->gd_looffset = x; 2910 gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); 2911 gdp->gd_stkcpy = 1; 2912 gdp->gd_type = SDT_SYS386CGT; 2913 gdp->gd_dpl = SEL_UPL; 2914 gdp->gd_p = 1; 2915 gdp->gd_hioffset = x >> 16; 2916 2917 /* XXX does this work? */ 2918 /* XXX yes! */ 2919 ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; 2920 ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL]; 2921 2922 /* transfer to user mode */ 2923 2924 _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); 2925 _udatasel = GSEL(GUDATA_SEL, SEL_UPL); 2926 2927 /* setup proc 0's pcb */ 2928 thread0.td_pcb->pcb_flags = 0; 2929#ifdef PAE 2930 thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; 2931#else 2932 thread0.td_pcb->pcb_cr3 = (int)IdlePTD; 2933#endif 2934 thread0.td_pcb->pcb_ext = 0; 2935 thread0.td_frame = &proc0_tf; 2936 2937 if (cpu_probe_amdc1e()) 2938 cpu_idle_fn = cpu_idle_amdc1e; 2939} 2940#endif 2941 2942void 2943cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) 2944{ 2945 2946 pcpu->pc_acpi_id = 0xffffffff; 2947} 2948 2949void 2950spinlock_enter(void) 2951{ 2952 struct thread *td; 2953 2954 td = curthread; 2955 if (td->td_md.md_spinlock_count == 0) 2956 td->td_md.md_saved_flags = intr_disable(); 2957 td->td_md.md_spinlock_count++; 2958 critical_enter(); 2959} 2960 2961void 2962spinlock_exit(void) 2963{ 2964 struct thread *td; 2965 2966 td = curthread; 2967 critical_exit(); 2968 td->td_md.md_spinlock_count--; 2969 if (td->td_md.md_spinlock_count == 0) 2970 intr_restore(td->td_md.md_saved_flags); 2971} 2972 2973#if defined(I586_CPU) && !defined(NO_F00F_HACK) 2974static void f00f_hack(void *unused); 2975SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL); 2976 2977static void 2978f00f_hack(void *unused) 2979{ 2980 struct gate_descriptor *new_idt; 2981 vm_offset_t tmp; 2982 2983 if (!has_f00f_bug) 2984 return; 2985 2986 GIANT_REQUIRED; 2987 2988 printf("Intel Pentium detected, installing workaround for F00F bug\n"); 2989 2990 tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2); 2991 if (tmp == 0) 2992 panic("kmem_alloc returned 0"); 2993 2994 /* Put the problematic entry (#6) at the end of the lower page. */ 2995 new_idt = (struct gate_descriptor*) 2996 (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor)); 2997 bcopy(idt, new_idt, sizeof(idt0)); 2998 r_idt.rd_base = (u_int)new_idt; 2999 lidt(&r_idt); 3000 idt = new_idt; 3001 if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE, 3002 VM_PROT_READ, FALSE) != KERN_SUCCESS) 3003 panic("vm_map_protect failed"); 3004} 3005#endif /* defined(I586_CPU) && !NO_F00F_HACK */ 3006 3007/* 3008 * Construct a PCB from a trapframe. This is called from kdb_trap() where 3009 * we want to start a backtrace from the function that caused us to enter 3010 * the debugger. We have the context in the trapframe, but base the trace 3011 * on the PCB. The PCB doesn't have to be perfect, as long as it contains 3012 * enough for a backtrace. 3013 */ 3014void 3015makectx(struct trapframe *tf, struct pcb *pcb) 3016{ 3017 3018 pcb->pcb_edi = tf->tf_edi; 3019 pcb->pcb_esi = tf->tf_esi; 3020 pcb->pcb_ebp = tf->tf_ebp; 3021 pcb->pcb_ebx = tf->tf_ebx; 3022 pcb->pcb_eip = tf->tf_eip; 3023 pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8; 3024} 3025 3026int 3027ptrace_set_pc(struct thread *td, u_long addr) 3028{ 3029 3030 td->td_frame->tf_eip = addr; 3031 return (0); 3032} 3033 3034int 3035ptrace_single_step(struct thread *td) 3036{ 3037 td->td_frame->tf_eflags |= PSL_T; 3038 return (0); 3039} 3040 3041int 3042ptrace_clear_single_step(struct thread *td) 3043{ 3044 td->td_frame->tf_eflags &= ~PSL_T; 3045 return (0); 3046} 3047 3048int 3049fill_regs(struct thread *td, struct reg *regs) 3050{ 3051 struct pcb *pcb; 3052 struct trapframe *tp; 3053 3054 tp = td->td_frame; 3055 pcb = td->td_pcb; 3056 regs->r_fs = tp->tf_fs; 3057 regs->r_es = tp->tf_es; 3058 regs->r_ds = tp->tf_ds; 3059 regs->r_edi = tp->tf_edi; 3060 regs->r_esi = tp->tf_esi; 3061 regs->r_ebp = tp->tf_ebp; 3062 regs->r_ebx = tp->tf_ebx; 3063 regs->r_edx = tp->tf_edx; 3064 regs->r_ecx = tp->tf_ecx; 3065 regs->r_eax = tp->tf_eax; 3066 regs->r_eip = tp->tf_eip; 3067 regs->r_cs = tp->tf_cs; 3068 regs->r_eflags = tp->tf_eflags; 3069 regs->r_esp = tp->tf_esp; 3070 regs->r_ss = tp->tf_ss; 3071 regs->r_gs = pcb->pcb_gs; 3072 return (0); 3073} 3074 3075int 3076set_regs(struct thread *td, struct reg *regs) 3077{ 3078 struct pcb *pcb; 3079 struct trapframe *tp; 3080 3081 tp = td->td_frame; 3082 if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || 3083 !CS_SECURE(regs->r_cs)) 3084 return (EINVAL); 3085 pcb = td->td_pcb; 3086 tp->tf_fs = regs->r_fs; 3087 tp->tf_es = regs->r_es; 3088 tp->tf_ds = regs->r_ds; 3089 tp->tf_edi = regs->r_edi; 3090 tp->tf_esi = regs->r_esi; 3091 tp->tf_ebp = regs->r_ebp; 3092 tp->tf_ebx = regs->r_ebx; 3093 tp->tf_edx = regs->r_edx; 3094 tp->tf_ecx = regs->r_ecx; 3095 tp->tf_eax = regs->r_eax; 3096 tp->tf_eip = regs->r_eip; 3097 tp->tf_cs = regs->r_cs; 3098 tp->tf_eflags = regs->r_eflags; 3099 tp->tf_esp = regs->r_esp; 3100 tp->tf_ss = regs->r_ss; 3101 pcb->pcb_gs = regs->r_gs; 3102 return (0); 3103} 3104 3105#ifdef CPU_ENABLE_SSE 3106static void 3107fill_fpregs_xmm(sv_xmm, sv_87) 3108 struct savexmm *sv_xmm; 3109 struct save87 *sv_87; 3110{ 3111 register struct env87 *penv_87 = &sv_87->sv_env; 3112 register struct envxmm *penv_xmm = &sv_xmm->sv_env; 3113 int i; 3114 3115 bzero(sv_87, sizeof(*sv_87)); 3116 3117 /* FPU control/status */ 3118 penv_87->en_cw = penv_xmm->en_cw; 3119 penv_87->en_sw = penv_xmm->en_sw; 3120 penv_87->en_tw = penv_xmm->en_tw; 3121 penv_87->en_fip = penv_xmm->en_fip; 3122 penv_87->en_fcs = penv_xmm->en_fcs; 3123 penv_87->en_opcode = penv_xmm->en_opcode; 3124 penv_87->en_foo = penv_xmm->en_foo; 3125 penv_87->en_fos = penv_xmm->en_fos; 3126 3127 /* FPU registers */ 3128 for (i = 0; i < 8; ++i) 3129 sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; 3130} 3131 3132static void 3133set_fpregs_xmm(sv_87, sv_xmm) 3134 struct save87 *sv_87; 3135 struct savexmm *sv_xmm; 3136{ 3137 register struct env87 *penv_87 = &sv_87->sv_env; 3138 register struct envxmm *penv_xmm = &sv_xmm->sv_env; 3139 int i; 3140 3141 /* FPU control/status */ 3142 penv_xmm->en_cw = penv_87->en_cw; 3143 penv_xmm->en_sw = penv_87->en_sw; 3144 penv_xmm->en_tw = penv_87->en_tw; 3145 penv_xmm->en_fip = penv_87->en_fip; 3146 penv_xmm->en_fcs = penv_87->en_fcs; 3147 penv_xmm->en_opcode = penv_87->en_opcode; 3148 penv_xmm->en_foo = penv_87->en_foo; 3149 penv_xmm->en_fos = penv_87->en_fos; 3150 3151 /* FPU registers */ 3152 for (i = 0; i < 8; ++i) 3153 sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; 3154} 3155#endif /* CPU_ENABLE_SSE */ 3156 3157int 3158fill_fpregs(struct thread *td, struct fpreg *fpregs) 3159{ 3160#ifdef CPU_ENABLE_SSE 3161 if (cpu_fxsr) { 3162 fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm, 3163 (struct save87 *)fpregs); 3164 return (0); 3165 } 3166#endif /* CPU_ENABLE_SSE */ 3167 bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs); 3168 return (0); 3169} 3170 3171int 3172set_fpregs(struct thread *td, struct fpreg *fpregs) 3173{ 3174#ifdef CPU_ENABLE_SSE 3175 if (cpu_fxsr) { 3176 set_fpregs_xmm((struct save87 *)fpregs, 3177 &td->td_pcb->pcb_save.sv_xmm); 3178 return (0); 3179 } 3180#endif /* CPU_ENABLE_SSE */ 3181 bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs); 3182 return (0); 3183} 3184 3185/* 3186 * Get machine context. 3187 */ 3188int 3189get_mcontext(struct thread *td, mcontext_t *mcp, int flags) 3190{ 3191 struct trapframe *tp; 3192 struct segment_descriptor *sdp; 3193 3194 tp = td->td_frame; 3195 3196 PROC_LOCK(curthread->td_proc); 3197 mcp->mc_onstack = sigonstack(tp->tf_esp); 3198 PROC_UNLOCK(curthread->td_proc); 3199 mcp->mc_gs = td->td_pcb->pcb_gs; 3200 mcp->mc_fs = tp->tf_fs; 3201 mcp->mc_es = tp->tf_es; 3202 mcp->mc_ds = tp->tf_ds; 3203 mcp->mc_edi = tp->tf_edi; 3204 mcp->mc_esi = tp->tf_esi; 3205 mcp->mc_ebp = tp->tf_ebp; 3206 mcp->mc_isp = tp->tf_isp; 3207 mcp->mc_eflags = tp->tf_eflags; 3208 if (flags & GET_MC_CLEAR_RET) { 3209 mcp->mc_eax = 0; 3210 mcp->mc_edx = 0; 3211 mcp->mc_eflags &= ~PSL_C; 3212 } else { 3213 mcp->mc_eax = tp->tf_eax; 3214 mcp->mc_edx = tp->tf_edx; 3215 } 3216 mcp->mc_ebx = tp->tf_ebx; 3217 mcp->mc_ecx = tp->tf_ecx; 3218 mcp->mc_eip = tp->tf_eip; 3219 mcp->mc_cs = tp->tf_cs; 3220 mcp->mc_esp = tp->tf_esp; 3221 mcp->mc_ss = tp->tf_ss; 3222 mcp->mc_len = sizeof(*mcp); 3223 get_fpcontext(td, mcp); 3224 sdp = &td->td_pcb->pcb_gsd; 3225 mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; 3226 sdp = &td->td_pcb->pcb_fsd; 3227 mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; 3228 3229 return (0); 3230} 3231 3232/* 3233 * Set machine context. 3234 * 3235 * However, we don't set any but the user modifiable flags, and we won't 3236 * touch the cs selector. 3237 */ 3238int 3239set_mcontext(struct thread *td, const mcontext_t *mcp) 3240{ 3241 struct trapframe *tp; 3242 int eflags, ret; 3243 3244 tp = td->td_frame; 3245 if (mcp->mc_len != sizeof(*mcp)) 3246 return (EINVAL); 3247 eflags = (mcp->mc_eflags & PSL_USERCHANGE) | 3248 (tp->tf_eflags & ~PSL_USERCHANGE); 3249 if ((ret = set_fpcontext(td, mcp)) == 0) { 3250 tp->tf_fs = mcp->mc_fs; 3251 tp->tf_es = mcp->mc_es; 3252 tp->tf_ds = mcp->mc_ds; 3253 tp->tf_edi = mcp->mc_edi; 3254 tp->tf_esi = mcp->mc_esi; 3255 tp->tf_ebp = mcp->mc_ebp; 3256 tp->tf_ebx = mcp->mc_ebx; 3257 tp->tf_edx = mcp->mc_edx; 3258 tp->tf_ecx = mcp->mc_ecx; 3259 tp->tf_eax = mcp->mc_eax; 3260 tp->tf_eip = mcp->mc_eip; 3261 tp->tf_eflags = eflags; 3262 tp->tf_esp = mcp->mc_esp; 3263 tp->tf_ss = mcp->mc_ss; 3264 td->td_pcb->pcb_gs = mcp->mc_gs; 3265 ret = 0; 3266 } 3267 return (ret); 3268} 3269 3270static void 3271get_fpcontext(struct thread *td, mcontext_t *mcp) 3272{ 3273#ifndef DEV_NPX 3274 mcp->mc_fpformat = _MC_FPFMT_NODEV; 3275 mcp->mc_ownedfp = _MC_FPOWNED_NONE; 3276#else 3277 union savefpu *addr; 3278 3279 /* 3280 * XXX mc_fpstate might be misaligned, since its declaration is not 3281 * unportabilized using __attribute__((aligned(16))) like the 3282 * declaration of struct savemm, and anyway, alignment doesn't work 3283 * for auto variables since we don't use gcc's pessimal stack 3284 * alignment. Work around this by abusing the spare fields after 3285 * mcp->mc_fpstate. 3286 * 3287 * XXX unpessimize most cases by only aligning when fxsave might be 3288 * called, although this requires knowing too much about 3289 * npxgetregs()'s internals. 3290 */ 3291 addr = (union savefpu *)&mcp->mc_fpstate; 3292 if (td == PCPU_GET(fpcurthread) && 3293#ifdef CPU_ENABLE_SSE 3294 cpu_fxsr && 3295#endif 3296 ((uintptr_t)(void *)addr & 0xF)) { 3297 do 3298 addr = (void *)((char *)addr + 4); 3299 while ((uintptr_t)(void *)addr & 0xF); 3300 } 3301 mcp->mc_ownedfp = npxgetregs(td, addr); 3302 if (addr != (union savefpu *)&mcp->mc_fpstate) { 3303 bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); 3304 bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2)); 3305 } 3306 mcp->mc_fpformat = npxformat(); 3307#endif 3308} 3309 3310static int 3311set_fpcontext(struct thread *td, const mcontext_t *mcp) 3312{ 3313 union savefpu *addr; 3314 3315 if (mcp->mc_fpformat == _MC_FPFMT_NODEV) 3316 return (0); 3317 else if (mcp->mc_fpformat != _MC_FPFMT_387 && 3318 mcp->mc_fpformat != _MC_FPFMT_XMM) 3319 return (EINVAL); 3320 else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) 3321 /* We don't care what state is left in the FPU or PCB. */ 3322 fpstate_drop(td); 3323 else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || 3324 mcp->mc_ownedfp == _MC_FPOWNED_PCB) { 3325 /* XXX align as above. */ 3326 addr = (union savefpu *)&mcp->mc_fpstate; 3327 if (td == PCPU_GET(fpcurthread) && 3328#ifdef CPU_ENABLE_SSE 3329 cpu_fxsr && 3330#endif 3331 ((uintptr_t)(void *)addr & 0xF)) { 3332 do 3333 addr = (void *)((char *)addr + 4); 3334 while ((uintptr_t)(void *)addr & 0xF); 3335 bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate)); 3336 } 3337#ifdef DEV_NPX 3338#ifdef CPU_ENABLE_SSE 3339 if (cpu_fxsr) 3340 addr->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask; 3341#endif 3342 /* 3343 * XXX we violate the dubious requirement that npxsetregs() 3344 * be called with interrupts disabled. 3345 */ 3346 npxsetregs(td, addr); 3347#endif 3348 /* 3349 * Don't bother putting things back where they were in the 3350 * misaligned case, since we know that the caller won't use 3351 * them again. 3352 */ 3353 } else 3354 return (EINVAL); 3355 return (0); 3356} 3357 3358static void 3359fpstate_drop(struct thread *td) 3360{ 3361 register_t s; 3362 3363 s = intr_disable(); 3364#ifdef DEV_NPX 3365 if (PCPU_GET(fpcurthread) == td) 3366 npxdrop(); 3367#endif 3368 /* 3369 * XXX force a full drop of the npx. The above only drops it if we 3370 * owned it. npxgetregs() has the same bug in the !cpu_fxsr case. 3371 * 3372 * XXX I don't much like npxgetregs()'s semantics of doing a full 3373 * drop. Dropping only to the pcb matches fnsave's behaviour. 3374 * We only need to drop to !PCB_INITDONE in sendsig(). But 3375 * sendsig() is the only caller of npxgetregs()... perhaps we just 3376 * have too many layers. 3377 */ 3378 curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; 3379 intr_restore(s); 3380} 3381 3382int 3383fill_dbregs(struct thread *td, struct dbreg *dbregs) 3384{ 3385 struct pcb *pcb; 3386 3387 if (td == NULL) { 3388 dbregs->dr[0] = rdr0(); 3389 dbregs->dr[1] = rdr1(); 3390 dbregs->dr[2] = rdr2(); 3391 dbregs->dr[3] = rdr3(); 3392 dbregs->dr[4] = rdr4(); 3393 dbregs->dr[5] = rdr5(); 3394 dbregs->dr[6] = rdr6(); 3395 dbregs->dr[7] = rdr7(); 3396 } else { 3397 pcb = td->td_pcb; 3398 dbregs->dr[0] = pcb->pcb_dr0; 3399 dbregs->dr[1] = pcb->pcb_dr1; 3400 dbregs->dr[2] = pcb->pcb_dr2; 3401 dbregs->dr[3] = pcb->pcb_dr3; 3402 dbregs->dr[4] = 0; 3403 dbregs->dr[5] = 0; 3404 dbregs->dr[6] = pcb->pcb_dr6; 3405 dbregs->dr[7] = pcb->pcb_dr7; 3406 } 3407 return (0); 3408} 3409 3410int 3411set_dbregs(struct thread *td, struct dbreg *dbregs) 3412{ 3413 struct pcb *pcb; 3414 int i; 3415 3416 if (td == NULL) { 3417 load_dr0(dbregs->dr[0]); 3418 load_dr1(dbregs->dr[1]); 3419 load_dr2(dbregs->dr[2]); 3420 load_dr3(dbregs->dr[3]); 3421 load_dr4(dbregs->dr[4]); 3422 load_dr5(dbregs->dr[5]); 3423 load_dr6(dbregs->dr[6]); 3424 load_dr7(dbregs->dr[7]); 3425 } else { 3426 /* 3427 * Don't let an illegal value for dr7 get set. Specifically, 3428 * check for undefined settings. Setting these bit patterns 3429 * result in undefined behaviour and can lead to an unexpected 3430 * TRCTRAP. 3431 */ 3432 for (i = 0; i < 4; i++) { 3433 if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02) 3434 return (EINVAL); 3435 if (DBREG_DR7_LEN(dbregs->dr[7], i) == 0x02) 3436 return (EINVAL); 3437 } 3438 3439 pcb = td->td_pcb; 3440 3441 /* 3442 * Don't let a process set a breakpoint that is not within the 3443 * process's address space. If a process could do this, it 3444 * could halt the system by setting a breakpoint in the kernel 3445 * (if ddb was enabled). Thus, we need to check to make sure 3446 * that no breakpoints are being enabled for addresses outside 3447 * process's address space. 3448 * 3449 * XXX - what about when the watched area of the user's 3450 * address space is written into from within the kernel 3451 * ... wouldn't that still cause a breakpoint to be generated 3452 * from within kernel mode? 3453 */ 3454 3455 if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) { 3456 /* dr0 is enabled */ 3457 if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) 3458 return (EINVAL); 3459 } 3460 3461 if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) { 3462 /* dr1 is enabled */ 3463 if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) 3464 return (EINVAL); 3465 } 3466 3467 if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) { 3468 /* dr2 is enabled */ 3469 if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) 3470 return (EINVAL); 3471 } 3472 3473 if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) { 3474 /* dr3 is enabled */ 3475 if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) 3476 return (EINVAL); 3477 } 3478 3479 pcb->pcb_dr0 = dbregs->dr[0]; 3480 pcb->pcb_dr1 = dbregs->dr[1]; 3481 pcb->pcb_dr2 = dbregs->dr[2]; 3482 pcb->pcb_dr3 = dbregs->dr[3]; 3483 pcb->pcb_dr6 = dbregs->dr[6]; 3484 pcb->pcb_dr7 = dbregs->dr[7]; 3485 3486 pcb->pcb_flags |= PCB_DBREGS; 3487 } 3488 3489 return (0); 3490} 3491 3492/* 3493 * Return > 0 if a hardware breakpoint has been hit, and the 3494 * breakpoint was in user space. Return 0, otherwise. 3495 */ 3496int 3497user_dbreg_trap(void) 3498{ 3499 u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */ 3500 u_int32_t bp; /* breakpoint bits extracted from dr6 */ 3501 int nbp; /* number of breakpoints that triggered */ 3502 caddr_t addr[4]; /* breakpoint addresses */ 3503 int i; 3504 3505 dr7 = rdr7(); 3506 if ((dr7 & 0x000000ff) == 0) { 3507 /* 3508 * all GE and LE bits in the dr7 register are zero, 3509 * thus the trap couldn't have been caused by the 3510 * hardware debug registers 3511 */ 3512 return 0; 3513 } 3514 3515 nbp = 0; 3516 dr6 = rdr6(); 3517 bp = dr6 & 0x0000000f; 3518 3519 if (!bp) { 3520 /* 3521 * None of the breakpoint bits are set meaning this 3522 * trap was not caused by any of the debug registers 3523 */ 3524 return 0; 3525 } 3526 3527 /* 3528 * at least one of the breakpoints were hit, check to see 3529 * which ones and if any of them are user space addresses 3530 */ 3531 3532 if (bp & 0x01) { 3533 addr[nbp++] = (caddr_t)rdr0(); 3534 } 3535 if (bp & 0x02) { 3536 addr[nbp++] = (caddr_t)rdr1(); 3537 } 3538 if (bp & 0x04) { 3539 addr[nbp++] = (caddr_t)rdr2(); 3540 } 3541 if (bp & 0x08) { 3542 addr[nbp++] = (caddr_t)rdr3(); 3543 } 3544 3545 for (i = 0; i < nbp; i++) { 3546 if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) { 3547 /* 3548 * addr[i] is in user space 3549 */ 3550 return nbp; 3551 } 3552 } 3553 3554 /* 3555 * None of the breakpoints are in user space. 3556 */ 3557 return 0; 3558} 3559 3560#ifndef DEV_APIC 3561#include <machine/apicvar.h> 3562 3563/* 3564 * Provide stub functions so that the MADT APIC enumerator in the acpi 3565 * kernel module will link against a kernel without 'device apic'. 3566 * 3567 * XXX - This is a gross hack. 3568 */ 3569void 3570apic_register_enumerator(struct apic_enumerator *enumerator) 3571{ 3572} 3573 3574void * 3575ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase) 3576{ 3577 return (NULL); 3578} 3579 3580int 3581ioapic_disable_pin(void *cookie, u_int pin) 3582{ 3583 return (ENXIO); 3584} 3585 3586int 3587ioapic_get_vector(void *cookie, u_int pin) 3588{ 3589 return (-1); 3590} 3591 3592void 3593ioapic_register(void *cookie) 3594{ 3595} 3596 3597int 3598ioapic_remap_vector(void *cookie, u_int pin, int vector) 3599{ 3600 return (ENXIO); 3601} 3602 3603int 3604ioapic_set_extint(void *cookie, u_int pin) 3605{ 3606 return (ENXIO); 3607} 3608 3609int 3610ioapic_set_nmi(void *cookie, u_int pin) 3611{ 3612 return (ENXIO); 3613} 3614 3615int 3616ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol) 3617{ 3618 return (ENXIO); 3619} 3620 3621int 3622ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger) 3623{ 3624 return (ENXIO); 3625} 3626 3627void 3628lapic_create(u_int apic_id, int boot_cpu) 3629{ 3630} 3631 3632void 3633lapic_init(vm_paddr_t addr) 3634{ 3635} 3636 3637int 3638lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode) 3639{ 3640 return (ENXIO); 3641} 3642 3643int 3644lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol) 3645{ 3646 return (ENXIO); 3647} 3648 3649int 3650lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger) 3651{ 3652 return (ENXIO); 3653} 3654#endif 3655 3656#ifdef KDB 3657 3658/* 3659 * Provide inb() and outb() as functions. They are normally only available as 3660 * inline functions, thus cannot be called from the debugger. 3661 */ 3662 3663/* silence compiler warnings */ 3664u_char inb_(u_short); 3665void outb_(u_short, u_char); 3666 3667u_char 3668inb_(u_short port) 3669{ 3670 return inb(port); 3671} 3672 3673void 3674outb_(u_short port, u_char data) 3675{ 3676 outb(port, data); 3677} 3678 3679#endif /* KDB */ 3680