machdep.c revision 219468
133965Sjdp/*- 278828Sobrien * Copyright (c) 2003 Peter Wemm. 3218822Sdim * Copyright (c) 1992 Terrence R. Lambert. 459343Sobrien * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. 533965Sjdp * All rights reserved. 633965Sjdp * 733965Sjdp * This code is derived from software contributed to Berkeley by 833965Sjdp * William Jolitz. 933965Sjdp * 10130561Sobrien * Redistribution and use in source and binary forms, with or without 1133965Sjdp * modification, are permitted provided that the following conditions 12130561Sobrien * are met: 13130561Sobrien * 1. Redistributions of source code must retain the above copyright 14130561Sobrien * notice, this list of conditions and the following disclaimer. 15130561Sobrien * 2. Redistributions in binary form must reproduce the above copyright 1633965Sjdp * notice, this list of conditions and the following disclaimer in the 17130561Sobrien * documentation and/or other materials provided with the distribution. 18130561Sobrien * 3. All advertising materials mentioning features or use of this software 19130561Sobrien * must display the following acknowledgement: 20130561Sobrien * This product includes software developed by the University of 2133965Sjdp * California, Berkeley and its contributors. 22130561Sobrien * 4. Neither the name of the University nor the names of its contributors 23130561Sobrien * may be used to endorse or promote products derived from this software 24218822Sdim * without specific prior written permission. 2533965Sjdp * 2633965Sjdp * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2733965Sjdp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2833965Sjdp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2933965Sjdp * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30130561Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3133965Sjdp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3233965Sjdp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3359343Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3433965Sjdp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35130561Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3633965Sjdp * SUCH DAMAGE. 3759343Sobrien * 38130561Sobrien * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 3933965Sjdp */ 4059343Sobrien 41130561Sobrien#include <sys/cdefs.h> 4233965Sjdp__FBSDID("$FreeBSD: head/sys/amd64/amd64/machdep.c 219468 2011-03-10 22:56:00Z mdf $"); 4359343Sobrien 44130561Sobrien#include "opt_atalk.h" 4533965Sjdp#include "opt_atpic.h" 4659343Sobrien#include "opt_compat.h" 47130561Sobrien#include "opt_cpu.h" 4833965Sjdp#include "opt_ddb.h" 4959343Sobrien#include "opt_inet.h" 50130561Sobrien#include "opt_ipx.h" 51130561Sobrien#include "opt_isa.h" 52130561Sobrien#include "opt_kstack_pages.h" 5333965Sjdp#include "opt_maxmem.h" 5459343Sobrien#include "opt_perfmon.h" 55130561Sobrien#include "opt_sched.h" 56130561Sobrien#include "opt_kdtrace.h" 57130561Sobrien 5833965Sjdp#include <sys/param.h> 5959343Sobrien#include <sys/proc.h> 6033965Sjdp#include <sys/systm.h> 61130561Sobrien#include <sys/bio.h> 62130561Sobrien#include <sys/buf.h> 63130561Sobrien#include <sys/bus.h> 64130561Sobrien#include <sys/callout.h> 65130561Sobrien#include <sys/cons.h> 66130561Sobrien#include <sys/cpu.h> 67130561Sobrien#include <sys/eventhandler.h> 68130561Sobrien#include <sys/exec.h> 69130561Sobrien#include <sys/imgact.h> 70130561Sobrien#include <sys/kdb.h> 71130561Sobrien#include <sys/kernel.h> 72130561Sobrien#include <sys/ktr.h> 73130561Sobrien#include <sys/linker.h> 74130561Sobrien#include <sys/lock.h> 75130561Sobrien#include <sys/malloc.h> 76130561Sobrien#include <sys/msgbuf.h> 77130561Sobrien#include <sys/mutex.h> 78130561Sobrien#include <sys/pcpu.h> 7933965Sjdp#include <sys/ptrace.h> 80130561Sobrien#include <sys/reboot.h> 8133965Sjdp#include <sys/sched.h> 8259343Sobrien#include <sys/signalvar.h> 8359343Sobrien#include <sys/syscallsubr.h> 8459343Sobrien#include <sys/sysctl.h> 8577298Sobrien#include <sys/sysent.h> 8633965Sjdp#include <sys/sysproto.h> 8759343Sobrien#include <sys/ucontext.h> 8859343Sobrien#include <sys/vmmeter.h> 8959343Sobrien 9059343Sobrien#include <vm/vm.h> 9159343Sobrien#include <vm/vm_extern.h> 92130561Sobrien#include <vm/vm_kern.h> 93130561Sobrien#include <vm/vm_page.h> 9459343Sobrien#include <vm/vm_map.h> 9559343Sobrien#include <vm/vm_object.h> 9633965Sjdp#include <vm/vm_pager.h> 9777298Sobrien#include <vm/vm_param.h> 98104834Sobrien 9977298Sobrien#ifdef DDB 10033965Sjdp#ifndef KDB 101130561Sobrien#error KDB must be enabled in order for DDB to work! 102130561Sobrien#endif 103130561Sobrien#include <ddb/ddb.h> 104130561Sobrien#include <ddb/db_sym.h> 105130561Sobrien#endif 106130561Sobrien 107130561Sobrien#include <net/netisr.h> 108130561Sobrien 109130561Sobrien#include <machine/clock.h> 110130561Sobrien#include <machine/cpu.h> 111130561Sobrien#include <machine/cputypes.h> 11233965Sjdp#include <machine/intr_machdep.h> 113130561Sobrien#include <x86/mca.h> 11433965Sjdp#include <machine/md_var.h> 115130561Sobrien#include <machine/metadata.h> 116130561Sobrien#include <machine/pc/bios.h> 117130561Sobrien#include <machine/pcb.h> 118130561Sobrien#include <machine/proc.h> 119130561Sobrien#include <machine/reg.h> 120130561Sobrien#include <machine/sigframe.h> 121218822Sdim#include <machine/specialreg.h> 12233965Sjdp#ifdef PERFMON 123130561Sobrien#include <machine/perfmon.h> 124130561Sobrien#endif 125130561Sobrien#include <machine/tss.h> 126130561Sobrien#ifdef SMP 127130561Sobrien#include <machine/smp.h> 128130561Sobrien#endif 129130561Sobrien 130130561Sobrien#ifdef DEV_ATPIC 131130561Sobrien#include <x86/isa/icu.h> 132130561Sobrien#else 133130561Sobrien#include <machine/apicvar.h> 134130561Sobrien#endif 135130561Sobrien 136130561Sobrien#include <isa/isareg.h> 137130561Sobrien#include <isa/rtc.h> 138130561Sobrien 139130561Sobrien/* Sanity check for __curthread() */ 140130561SobrienCTASSERT(offsetof(struct pcpu, pc_curthread) == 0); 141130561Sobrien 142130561Sobrienextern u_int64_t hammer_time(u_int64_t, u_int64_t); 143130561Sobrien 144130561Sobrienextern void printcpuinfo(void); /* XXX header file */ 145130561Sobrienextern void identify_cpu(void); 146130561Sobrienextern void panicifcpuunsupported(void); 147130561Sobrien 148130561Sobrien#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 149130561Sobrien#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 150130561Sobrien 15133965Sjdpstatic void cpu_startup(void *); 152130561Sobrienstatic void get_fpcontext(struct thread *td, mcontext_t *mcp); 153130561Sobrienstatic int set_fpcontext(struct thread *td, const mcontext_t *mcp); 154130561SobrienSYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); 155130561Sobrien 156130561Sobrien#ifdef DDB 157130561Sobrienextern vm_offset_t ksym_start, ksym_end; 158130561Sobrien#endif 159130561Sobrien 160130561Sobrienstruct msgbuf *msgbufp; 161130561Sobrien 162130561Sobrien/* Intel ICH registers */ 163130561Sobrien#define ICH_PMBASE 0x400 164130561Sobrien#define ICH_SMI_EN ICH_PMBASE + 0x30 165130561Sobrien 166130561Sobrienint _udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel; 167130561Sobrien 168130561Sobrienint cold = 1; 169130561Sobrien 170130561Sobrienlong Maxmem = 0; 171130561Sobrienlong realmem = 0; 172130561Sobrien 173130561Sobrien/* 174130561Sobrien * The number of PHYSMAP entries must be one less than the number of 175130561Sobrien * PHYSSEG entries because the PHYSMAP entry that spans the largest 176130561Sobrien * physical address that is accessible by ISA DMA is split into two 177130561Sobrien * PHYSSEG entries. 178130561Sobrien */ 179130561Sobrien#define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) 180130561Sobrien 181130561Sobrienvm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; 182130561Sobrienvm_paddr_t dump_avail[PHYSMAP_SIZE + 2]; 183130561Sobrien 184218822Sdim/* must be 2 less so 0 0 can signal end of chunks */ 185130561Sobrien#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2) 186218822Sdim#define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2) 187218822Sdim 188218822Sdimstruct kva_md_info kmi; 189218822Sdim 190218822Sdimstatic struct trapframe proc0_tf; 19159343Sobrienstruct region_descriptor r_gdt, r_idt; 19233965Sjdp 19333965Sjdpstruct pcpu __pcpu[MAXCPU]; 19433965Sjdp 19533965Sjdpstruct mtx icu_lock; 19633965Sjdp 19733965Sjdpstruct mtx dt_lock; /* lock for GDT and LDT */ 19877298Sobrien 19933965Sjdpstatic void 20077298Sobriencpu_startup(dummy) 201104834Sobrien void *dummy; 202130561Sobrien{ 20377298Sobrien uintmax_t memsize; 204218822Sdim char *sysenv; 205218822Sdim 206218822Sdim /* 20777298Sobrien * On MacBooks, we need to disallow the legacy USB circuit to 208218822Sdim * generate an SMI# because this can cause several problems, 209218822Sdim * namely: incorrect CPU frequency detection and failure to 21033965Sjdp * start the APs. 211218822Sdim * We do this by disabling a bit in the SMI_EN (SMI Control and 212218822Sdim * Enable register) of the Intel ICH LPC Interface Bridge. 21359343Sobrien */ 214218822Sdim sysenv = getenv("smbios.system.product"); 215218822Sdim if (sysenv != NULL) { 21633965Sjdp if (strncmp(sysenv, "MacBook1,1", 10) == 0 || 217218822Sdim strncmp(sysenv, "MacBook3,1", 10) == 0 || 218218822Sdim strncmp(sysenv, "MacBookPro1,1", 13) == 0 || 21991041Sobrien strncmp(sysenv, "MacBookPro1,2", 13) == 0 || 220218822Sdim strncmp(sysenv, "MacBookPro3,1", 13) == 0 || 221218822Sdim strncmp(sysenv, "Macmini1,1", 10) == 0) { 22233965Sjdp if (bootverbose) 223218822Sdim printf("Disabling LEGACY_USB_EN bit on " 224218822Sdim "Intel ICH.\n"); 22533965Sjdp outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); 226218822Sdim } 227218822Sdim freeenv(sysenv); 22889857Sobrien } 229218822Sdim 230218822Sdim /* 231218822Sdim * Good {morning,afternoon,evening,night}. 232218822Sdim */ 233218822Sdim startrtclock(); 234218822Sdim printcpuinfo(); 235218822Sdim panicifcpuunsupported(); 236218822Sdim#ifdef PERFMON 237218822Sdim perfmon_init(); 23833965Sjdp#endif 239218822Sdim realmem = Maxmem; 24033965Sjdp 24159343Sobrien /* 242218822Sdim * Display physical memory if SMBIOS reports reasonable amount. 24333965Sjdp */ 244218822Sdim memsize = 0; 245218822Sdim sysenv = getenv("smbios.memory.enabled"); 24633965Sjdp if (sysenv != NULL) { 247218822Sdim memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10; 248218822Sdim freeenv(sysenv); 24933965Sjdp } 250218822Sdim if (memsize < ptoa((uintmax_t)cnt.v_free_count)) 251218822Sdim memsize = ptoa((uintmax_t)Maxmem); 25259343Sobrien printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20); 253218822Sdim 254218822Sdim /* 25559343Sobrien * Display any holes after the first chunk of extended memory. 256218822Sdim */ 257218822Sdim if (bootverbose) { 25889857Sobrien int indx; 259218822Sdim 260218822Sdim printf("Physical memory chunk(s):\n"); 261104834Sobrien for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { 262218822Sdim vm_paddr_t size; 263218822Sdim 264218822Sdim size = phys_avail[indx + 1] - phys_avail[indx]; 265218822Sdim printf( 266218822Sdim "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", 267218822Sdim (uintmax_t)phys_avail[indx], 268130561Sobrien (uintmax_t)phys_avail[indx + 1] - 1, 26989857Sobrien (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); 270218822Sdim } 271218822Sdim } 272218822Sdim 273218822Sdim vm_ksubmap_init(&kmi); 274130561Sobrien 275218822Sdim printf("avail memory = %ju (%ju MB)\n", 276218822Sdim ptoa((uintmax_t)cnt.v_free_count), 277130561Sobrien ptoa((uintmax_t)cnt.v_free_count) / 1048576); 278130561Sobrien 279130561Sobrien /* 280130561Sobrien * Set up buffers, so they can be used to read disk labels. 281218822Sdim */ 282218822Sdim bufinit(); 283130561Sobrien vm_pager_bufferinit(); 284218822Sdim 285218822Sdim cpu_setregs(); 28633965Sjdp} 28733965Sjdp 28877298Sobrien/* 28933965Sjdp * Send an interrupt to process. 29033965Sjdp * 29133965Sjdp * Stack is set up to allow sigcode stored 29233965Sjdp * at top to call routine, followed by call 29377298Sobrien * to sigreturn routine below. After sigreturn 29433965Sjdp * resets the signal mask, the stack, and the 295130561Sobrien * frame pointer, it returns to the user 29633965Sjdp * specified pc, psl. 29733965Sjdp */ 29833965Sjdpvoid 29933965Sjdpsendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 30033965Sjdp{ 30133965Sjdp struct sigframe sf, *sfp; 302104834Sobrien struct pcb *pcb; 303130561Sobrien struct proc *p; 304130561Sobrien struct thread *td; 30533965Sjdp struct sigacts *psp; 30633965Sjdp char *sp; 30733965Sjdp struct trapframe *regs; 308218822Sdim int sig; 309218822Sdim int oonstack; 310218822Sdim 311218822Sdim td = curthread; 31289857Sobrien pcb = td->td_pcb; 31377298Sobrien p = td->td_proc; 31433965Sjdp PROC_LOCK_ASSERT(p, MA_OWNED); 31533965Sjdp sig = ksi->ksi_signo; 31633965Sjdp psp = p->p_sigacts; 31733965Sjdp mtx_assert(&psp->ps_mtx, MA_OWNED); 31859343Sobrien regs = td->td_frame; 31959343Sobrien oonstack = sigonstack(regs->tf_rsp); 32033965Sjdp 32133965Sjdp /* Save user context. */ 32277298Sobrien bzero(&sf, sizeof(sf)); 32333965Sjdp sf.sf_uc.uc_sigmask = *mask; 32433965Sjdp sf.sf_uc.uc_stack = td->td_sigstk; 32533965Sjdp sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 32633965Sjdp ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; 32733965Sjdp sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; 32833965Sjdp bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs)); 32933965Sjdp sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ 33033965Sjdp get_fpcontext(td, &sf.sf_uc.uc_mcontext); 33133965Sjdp fpstate_drop(td); 33233965Sjdp sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase; 33333965Sjdp sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase; 33433965Sjdp bzero(sf.sf_uc.uc_mcontext.mc_spare, 33533965Sjdp sizeof(sf.sf_uc.uc_mcontext.mc_spare)); 33633965Sjdp bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); 337130561Sobrien 338130561Sobrien /* Allocate space for the signal handler context. */ 33977298Sobrien if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && 340130561Sobrien SIGISMEMBER(psp->ps_sigonstack, sig)) { 34177298Sobrien sp = td->td_sigstk.ss_sp + 34259343Sobrien td->td_sigstk.ss_size - sizeof(struct sigframe); 343130561Sobrien#if defined(COMPAT_43) 344130561Sobrien td->td_sigstk.ss_flags |= SS_ONSTACK; 34559343Sobrien#endif 346218822Sdim } else 347218822Sdim sp = (char *)regs->tf_rsp - sizeof(struct sigframe) - 128; 348104834Sobrien /* Align to 16 bytes. */ 349104834Sobrien sfp = (struct sigframe *)((unsigned long)sp & ~0xFul); 35033965Sjdp 35133965Sjdp /* Translate the signal if appropriate. */ 35233965Sjdp if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) 35333965Sjdp sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 35433965Sjdp 35533965Sjdp /* Build the argument list for the signal handler. */ 35633965Sjdp regs->tf_rdi = sig; /* arg 1 in %rdi */ 35733965Sjdp regs->tf_rdx = (register_t)&sfp->sf_uc; /* arg 3 in %rdx */ 35833965Sjdp bzero(&sf.sf_si, sizeof(sf.sf_si)); 35933965Sjdp if (SIGISMEMBER(psp->ps_siginfo, sig)) { 36033965Sjdp /* Signal handler installed with SA_SIGINFO. */ 36133965Sjdp regs->tf_rsi = (register_t)&sfp->sf_si; /* arg 2 in %rsi */ 36233965Sjdp sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; 36333965Sjdp 36459343Sobrien /* Fill in POSIX parts */ 36559343Sobrien sf.sf_si = ksi->ksi_info; 36633965Sjdp sf.sf_si.si_signo = sig; /* maybe a translated signal */ 36777298Sobrien regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */ 36833965Sjdp } else { 36933965Sjdp /* Old FreeBSD-style arguments. */ 37033965Sjdp regs->tf_rsi = ksi->ksi_code; /* arg 2 in %rsi */ 37133965Sjdp regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */ 37259343Sobrien sf.sf_ahu.sf_handler = catcher; 37359343Sobrien } 37459343Sobrien mtx_unlock(&psp->ps_mtx); 375130561Sobrien PROC_UNLOCK(p); 37677298Sobrien 37777298Sobrien /* 378104834Sobrien * Copy the sigframe out to the user's stack. 37959343Sobrien */ 38059343Sobrien if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { 38159343Sobrien#ifdef DEBUG 38233965Sjdp printf("process %ld has trashed its stack\n", (long)p->p_pid); 38333965Sjdp#endif 38477298Sobrien PROC_LOCK(p); 38533965Sjdp sigexit(td, SIGILL); 38633965Sjdp } 38733965Sjdp 38833965Sjdp regs->tf_rsp = (long)sfp; 38959343Sobrien regs->tf_rip = p->p_sysent->sv_sigcode_base; 390130561Sobrien regs->tf_rflags &= ~(PSL_T | PSL_D); 391255522Semaste regs->tf_cs = _ucodesel; 392130561Sobrien regs->tf_ds = _udatasel; 39359343Sobrien regs->tf_es = _udatasel; 394255522Semaste regs->tf_fs = _ufssel; 39533965Sjdp regs->tf_gs = _ugssel; 39677298Sobrien regs->tf_flags = TF_HASSEGS; 39759343Sobrien set_pcb_flags(pcb, PCB_FULL_IRET); 39859343Sobrien PROC_LOCK(p); 39959343Sobrien mtx_lock(&psp->ps_mtx); 40059343Sobrien} 40159343Sobrien 40259343Sobrien/* 40359343Sobrien * System call to cleanup state after a signal 40459343Sobrien * has been taken. Reset signal mask and 405253451Semaste * stack state from context left by sendsig (above). 406253451Semaste * Return to previous pc and psl as specified by 407253451Semaste * context left by sendsig. Check carefully to 408253451Semaste * make sure that the user has not modified the 409253451Semaste * state to gain improper privileges. 410253451Semaste * 411253451Semaste * MPSAFE 412253451Semaste */ 413253451Semasteint 414253451Semastesigreturn(td, uap) 415253451Semaste struct thread *td; 416253451Semaste struct sigreturn_args /* { 41789857Sobrien const struct __ucontext *sigcntxp; 418253451Semaste } */ *uap; 41989857Sobrien{ 42089857Sobrien ucontext_t uc; 42189857Sobrien struct pcb *pcb; 42289857Sobrien struct proc *p; 42389857Sobrien struct trapframe *regs; 42489857Sobrien ucontext_t *ucp; 42589857Sobrien long rflags; 42633965Sjdp int cs, error, ret; 42733965Sjdp ksiginfo_t ksi; 42833965Sjdp 42989857Sobrien pcb = td->td_pcb; 43033965Sjdp p = td->td_proc; 431104834Sobrien 432104834Sobrien error = copyin(uap->sigcntxp, &uc, sizeof(uc)); 433104834Sobrien if (error != 0) { 434104834Sobrien uprintf("pid %d (%s): sigreturn copyin failed\n", 435104834Sobrien p->p_pid, td->td_name); 436104834Sobrien return (error); 437130561Sobrien } 438130561Sobrien ucp = &uc; 439104834Sobrien if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) { 440255522Semaste uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid, 441255522Semaste td->td_name, ucp->uc_mcontext.mc_flags); 442255522Semaste return (EINVAL); 443104834Sobrien } 444104834Sobrien regs = td->td_frame; 445104834Sobrien rflags = ucp->uc_mcontext.mc_rflags; 446104834Sobrien /* 447130561Sobrien * Don't allow users to change privileged or reserved flags. 448130561Sobrien */ 449130561Sobrien /* 450130561Sobrien * XXX do allow users to change the privileged flag PSL_RF. 451104834Sobrien * The cpu sets PSL_RF in tf_rflags for faults. Debuggers 452104834Sobrien * should sometimes set it there too. tf_rflags is kept in 453104834Sobrien * the signal context during signal handling and there is no 454104834Sobrien * other place to remember it, so the PSL_RF bit may be 455246312Sandrew * corrupted by the signal handler without us knowing. 456246312Sandrew * Corruption of the PSL_RF bit at worst causes one more or 457249603Sandrew * one less debugger trap, so allowing it is fairly harmless. 458249603Sandrew */ 459255874Sandrew if (!EFL_SECURE(rflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) { 460246312Sandrew uprintf("pid %d (%s): sigreturn rflags = 0x%lx\n", p->p_pid, 46133965Sjdp td->td_name, rflags); 46233965Sjdp return (EINVAL); 46377298Sobrien } 46433965Sjdp 46533965Sjdp /* 46633965Sjdp * Don't allow users to load a valid privileged %cs. Let the 46733965Sjdp * hardware check for invalid selectors, excess privilege in 46833965Sjdp * other selectors, invalid %eip's and invalid %esp's. 46959343Sobrien */ 47059343Sobrien cs = ucp->uc_mcontext.mc_cs; 47159343Sobrien if (!CS_SECURE(cs)) { 47259343Sobrien uprintf("pid %d (%s): sigreturn cs = 0x%x\n", p->p_pid, 47359343Sobrien td->td_name, cs); 47459343Sobrien ksiginfo_init_trap(&ksi); 47559343Sobrien ksi.ksi_signo = SIGBUS; 47659343Sobrien ksi.ksi_code = BUS_OBJERR; 47759343Sobrien ksi.ksi_trapno = T_PROTFLT; 47859343Sobrien ksi.ksi_addr = (void *)regs->tf_rip; 479218822Sdim trapsignal(td, &ksi); 48059343Sobrien return (EINVAL); 48159343Sobrien } 48259343Sobrien 48359343Sobrien ret = set_fpcontext(td, &ucp->uc_mcontext); 48459343Sobrien if (ret != 0) { 48559343Sobrien uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n", 48659343Sobrien p->p_pid, td->td_name, ret); 48759343Sobrien return (ret); 48859343Sobrien } 48977298Sobrien bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs)); 49033965Sjdp pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase; 49133965Sjdp pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase; 49233965Sjdp 49333965Sjdp#if defined(COMPAT_43) 494130561Sobrien if (ucp->uc_mcontext.mc_onstack & 1) 495130561Sobrien td->td_sigstk.ss_flags |= SS_ONSTACK; 49633965Sjdp else 49733965Sjdp td->td_sigstk.ss_flags &= ~SS_ONSTACK; 49833965Sjdp#endif 49933965Sjdp 50033965Sjdp kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); 50133965Sjdp set_pcb_flags(pcb, PCB_FULL_IRET); 50233965Sjdp return (EJUSTRETURN); 50333965Sjdp} 50459343Sobrien 505104834Sobrien#ifdef COMPAT_FREEBSD4 506218822Sdimint 507218822Sdimfreebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) 508130561Sobrien{ 509130561Sobrien 51033965Sjdp return sigreturn(td, (struct sigreturn_args *)uap); 51133965Sjdp} 51233965Sjdp#endif 51333965Sjdp 51477298Sobrien 51533965Sjdp/* 51633965Sjdp * Machine dependent boot() routine 51733965Sjdp * 51833965Sjdp * I haven't seen anything to put here yet 51933965Sjdp * Possibly some stuff might be grafted back here from boot() 520130561Sobrien */ 521130561Sobrienvoid 522130561Sobriencpu_boot(int howto) 523130561Sobrien{ 524130561Sobrien} 525130561Sobrien 52689857Sobrien/* 52733965Sjdp * Flush the D-cache for non-DMA I/O so that the I-cache can 52859343Sobrien * be made coherent later. 52959343Sobrien */ 530130561Sobrienvoid 53159343Sobriencpu_flush_dcache(void *ptr, size_t len) 53259343Sobrien{ 53359343Sobrien /* Not applicable */ 53459343Sobrien} 53559343Sobrien 53677298Sobrien/* Get current clock frequency for the given cpu id. */ 53733965Sjdpint 53833965Sjdpcpu_est_clockrate(int cpu_id, uint64_t *rate) 53933965Sjdp{ 54033965Sjdp register_t reg; 54133965Sjdp uint64_t tsc1, tsc2; 54233965Sjdp 54333965Sjdp if (pcpu_find(cpu_id) == NULL || rate == NULL) 544218822Sdim return (EINVAL); 54533965Sjdp 54677298Sobrien /* If TSC is P-state invariant, DELAY(9) based logic fails. */ 54733965Sjdp if (tsc_is_invariant) 54833965Sjdp return (EOPNOTSUPP); 54933965Sjdp 55033965Sjdp /* If we're booting, trust the rate calibrated moments ago. */ 55133965Sjdp if (cold) { 55233965Sjdp *rate = tsc_freq; 55333965Sjdp return (0); 55433965Sjdp } 55533965Sjdp 55633965Sjdp#ifdef SMP 55733965Sjdp /* Schedule ourselves on the indicated cpu. */ 55833965Sjdp thread_lock(curthread); 55933965Sjdp sched_bind(curthread, cpu_id); 56033965Sjdp thread_unlock(curthread); 56133965Sjdp#endif 56233965Sjdp 56333965Sjdp /* Calibrate by measuring a short delay. */ 56433965Sjdp reg = intr_disable(); 56533965Sjdp tsc1 = rdtsc(); 56633965Sjdp DELAY(1000); 56733965Sjdp tsc2 = rdtsc(); 56833965Sjdp intr_restore(reg); 56933965Sjdp 57033965Sjdp#ifdef SMP 57133965Sjdp thread_lock(curthread); 572130561Sobrien sched_unbind(curthread); 573130561Sobrien thread_unlock(curthread); 574130561Sobrien#endif 57559343Sobrien 57659343Sobrien *rate = (tsc2 - tsc1) * 1000; 577130561Sobrien return (0); 578130561Sobrien} 579218822Sdim 58059343Sobrien/* 58159343Sobrien * Shutdown the CPU as much as possible 58259343Sobrien */ 58359343Sobrienvoid 58459343Sobriencpu_halt(void) 585130561Sobrien{ 58659343Sobrien for (;;) 587130561Sobrien __asm__ ("hlt"); 588130561Sobrien} 589130561Sobrien 59059343Sobrienvoid (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */ 591130561Sobrienstatic int cpu_ident_amdc1e = 0; /* AMD C1E supported. */ 592130561Sobrienstatic int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */ 59359343SobrienTUNABLE_INT("machdep.idle_mwait", &idle_mwait); 59433965SjdpSYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait, 595130561Sobrien 0, "Use MONITOR/MWAIT for short idle"); 596130561Sobrien 59759343Sobrien#define STATE_RUNNING 0x0 59859343Sobrien#define STATE_MWAIT 0x1 599104834Sobrien#define STATE_SLEEPING 0x2 600104834Sobrien 601104834Sobrienstatic void 60268765Sobriencpu_idle_acpi(int busy) 60359343Sobrien{ 60459343Sobrien int *state; 60559343Sobrien 60668765Sobrien state = (int *)PCPU_PTR(monitorbuf); 60759343Sobrien *state = STATE_SLEEPING; 60859343Sobrien disable_intr(); 60959343Sobrien if (sched_runnable()) 61059343Sobrien enable_intr(); 61159343Sobrien else if (cpu_idle_hook) 61259343Sobrien cpu_idle_hook(); 613218822Sdim else 614218822Sdim __asm __volatile("sti; hlt"); 615218822Sdim *state = STATE_RUNNING; 616104834Sobrien} 617104834Sobrien 61868765Sobrienstatic void 61968765Sobriencpu_idle_hlt(int busy) 62068765Sobrien{ 62168765Sobrien int *state; 62268765Sobrien 62359343Sobrien state = (int *)PCPU_PTR(monitorbuf); 62459343Sobrien *state = STATE_SLEEPING; 62559343Sobrien /* 62659343Sobrien * We must absolutely guarentee that hlt is the next instruction 62759343Sobrien * after sti or we introduce a timing window. 62859343Sobrien */ 62933965Sjdp disable_intr(); 63033965Sjdp if (sched_runnable()) 63133965Sjdp enable_intr(); 63233965Sjdp else 63333965Sjdp __asm __volatile("sti; hlt"); 63433965Sjdp *state = STATE_RUNNING; 63533965Sjdp} 63633965Sjdp 63733965Sjdp/* 63833965Sjdp * MWAIT cpu power states. Lower 4 bits are sub-states. 63933965Sjdp */ 64033965Sjdp#define MWAIT_C0 0xf0 64133965Sjdp#define MWAIT_C1 0x00 64233965Sjdp#define MWAIT_C2 0x10 64333965Sjdp#define MWAIT_C3 0x20 64459343Sobrien#define MWAIT_C4 0x30 64533965Sjdp 64633965Sjdpstatic void 64768765Sobriencpu_idle_mwait(int busy) 64868765Sobrien{ 64959343Sobrien int *state; 65068765Sobrien 65159343Sobrien state = (int *)PCPU_PTR(monitorbuf); 65268765Sobrien *state = STATE_MWAIT; 65368765Sobrien if (!sched_runnable()) { 65468765Sobrien cpu_monitor(state, 0, 0); 65577298Sobrien if (*state == STATE_MWAIT) 65668765Sobrien cpu_mwait(0, MWAIT_C1); 65768765Sobrien } 658130561Sobrien *state = STATE_RUNNING; 65959343Sobrien} 66059343Sobrien 66159343Sobrienstatic void 66268765Sobriencpu_idle_spin(int busy) 66359343Sobrien{ 66459343Sobrien int *state; 66559343Sobrien int i; 66659343Sobrien 66759343Sobrien state = (int *)PCPU_PTR(monitorbuf); 66859343Sobrien *state = STATE_RUNNING; 66959343Sobrien for (i = 0; i < 1000; i++) { 67059343Sobrien if (sched_runnable()) 67159343Sobrien return; 67259343Sobrien cpu_spinwait(); 67359343Sobrien } 67468765Sobrien} 67568765Sobrien 67668765Sobrien/* 67759343Sobrien * C1E renders the local APIC timer dead, so we disable it by 678130561Sobrien * reading the Interrupt Pending Message register and clearing 67959343Sobrien * both C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27). 68059343Sobrien * 68159343Sobrien * Reference: 68259343Sobrien * "BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh Processors" 683104834Sobrien * #32559 revision 3.00+ 68459343Sobrien */ 68533965Sjdp#define MSR_AMDK8_IPM 0xc0010055 68633965Sjdp#define AMDK8_SMIONCMPHALT (1ULL << 27) 68733965Sjdp#define AMDK8_C1EONCMPHALT (1ULL << 28) 68833965Sjdp#define AMDK8_CMPHALT (AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT) 68933965Sjdp 69033965Sjdpstatic void 69133965Sjdpcpu_probe_amdc1e(void) 69233965Sjdp{ 69333965Sjdp 69433965Sjdp /* 69533965Sjdp * Detect the presence of C1E capability mostly on latest 69633965Sjdp * dual-cores (or future) k8 family. 69733965Sjdp */ 69833965Sjdp if (cpu_vendor_id == CPU_VENDOR_AMD && 69933965Sjdp (cpu_id & 0x00000f00) == 0x00000f00 && 70033965Sjdp (cpu_id & 0x0fff0000) >= 0x00040000) { 70133965Sjdp cpu_ident_amdc1e = 1; 70233965Sjdp } 70333965Sjdp} 70433965Sjdp 70533965Sjdpvoid (*cpu_idle_fn)(int) = cpu_idle_acpi; 70633965Sjdp 70733965Sjdpvoid 70833965Sjdpcpu_idle(int busy) 70933965Sjdp{ 71033965Sjdp uint64_t msr; 71133965Sjdp 71233965Sjdp CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", 71333965Sjdp busy, curcpu); 71433965Sjdp#ifdef SMP 71533965Sjdp if (mp_grab_cpu_hlt()) 71633965Sjdp return; 71733965Sjdp#endif 71833965Sjdp /* If we are busy - try to use fast methods. */ 71933965Sjdp if (busy) { 72033965Sjdp if ((cpu_feature2 & CPUID2_MON) && idle_mwait) { 72133965Sjdp cpu_idle_mwait(busy); 72233965Sjdp goto out; 72333965Sjdp } 72433965Sjdp } 72533965Sjdp 72659343Sobrien /* If we have time - switch timers into idle mode. */ 72777298Sobrien if (!busy) { 72859343Sobrien critical_enter(); 72959343Sobrien cpu_idleclock(); 73059343Sobrien } 73159343Sobrien 73259343Sobrien /* Apply AMD APIC timer C1E workaround. */ 73377298Sobrien if (cpu_ident_amdc1e && cpu_disable_deep_sleep) { 73459343Sobrien msr = rdmsr(MSR_AMDK8_IPM); 73559343Sobrien if (msr & AMDK8_CMPHALT) 73659343Sobrien wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT); 73777298Sobrien } 73877298Sobrien 73959343Sobrien /* Call main idle method. */ 74077298Sobrien cpu_idle_fn(busy); 74159343Sobrien 74259343Sobrien /* Switch timers mack into active mode. */ 74359343Sobrien if (!busy) { 74459343Sobrien cpu_activeclock(); 745130561Sobrien critical_exit(); 74677298Sobrien } 74777298Sobrienout: 74877298Sobrien CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", 749130561Sobrien busy, curcpu); 750130561Sobrien} 751130561Sobrien 752130561Sobrienint 753130561Sobriencpu_idle_wakeup(int cpu) 754130561Sobrien{ 755130561Sobrien struct pcpu *pcpu; 756130561Sobrien int *state; 757130561Sobrien 758130561Sobrien pcpu = pcpu_find(cpu); 759130561Sobrien state = (int *)pcpu->pc_monitorbuf; 760130561Sobrien /* 761130561Sobrien * This doesn't need to be atomic since missing the race will 762130561Sobrien * simply result in unnecessary IPIs. 763130561Sobrien */ 764130561Sobrien if (*state == STATE_SLEEPING) 765130561Sobrien return (0); 766130561Sobrien if (*state == STATE_MWAIT) 767130561Sobrien *state = STATE_RUNNING; 768130561Sobrien return (1); 769130561Sobrien} 770130561Sobrien 771130561Sobrien/* 772130561Sobrien * Ordered by speed/power consumption. 773130561Sobrien */ 774130561Sobrienstruct { 775130561Sobrien void *id_fn; 776130561Sobrien char *id_name; 777130561Sobrien} idle_tbl[] = { 778130561Sobrien { cpu_idle_spin, "spin" }, 779130561Sobrien { cpu_idle_mwait, "mwait" }, 780130561Sobrien { cpu_idle_hlt, "hlt" }, 781130561Sobrien { cpu_idle_acpi, "acpi" }, 782130561Sobrien { NULL, NULL } 783130561Sobrien}; 784130561Sobrien 785130561Sobrienstatic int 786130561Sobrienidle_sysctl_available(SYSCTL_HANDLER_ARGS) 787130561Sobrien{ 788130561Sobrien char *avail, *p; 789130561Sobrien int error; 790130561Sobrien int i; 791130561Sobrien 792130561Sobrien avail = malloc(256, M_TEMP, M_WAITOK); 793130561Sobrien p = avail; 794130561Sobrien for (i = 0; idle_tbl[i].id_name != NULL; i++) { 795130561Sobrien if (strstr(idle_tbl[i].id_name, "mwait") && 796130561Sobrien (cpu_feature2 & CPUID2_MON) == 0) 797130561Sobrien continue; 798130561Sobrien if (strcmp(idle_tbl[i].id_name, "acpi") == 0 && 799130561Sobrien cpu_idle_hook == NULL) 800130561Sobrien continue; 80133965Sjdp p += sprintf(p, "%s%s", p != avail ? ", " : "", 802 idle_tbl[i].id_name); 803 } 804 error = sysctl_handle_string(oidp, avail, 0, req); 805 free(avail, M_TEMP); 806 return (error); 807} 808 809SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD, 810 0, 0, idle_sysctl_available, "A", "list of available idle functions"); 811 812static int 813idle_sysctl(SYSCTL_HANDLER_ARGS) 814{ 815 char buf[16]; 816 int error; 817 char *p; 818 int i; 819 820 p = "unknown"; 821 for (i = 0; idle_tbl[i].id_name != NULL; i++) { 822 if (idle_tbl[i].id_fn == cpu_idle_fn) { 823 p = idle_tbl[i].id_name; 824 break; 825 } 826 } 827 strncpy(buf, p, sizeof(buf)); 828 error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 829 if (error != 0 || req->newptr == NULL) 830 return (error); 831 for (i = 0; idle_tbl[i].id_name != NULL; i++) { 832 if (strstr(idle_tbl[i].id_name, "mwait") && 833 (cpu_feature2 & CPUID2_MON) == 0) 834 continue; 835 if (strcmp(idle_tbl[i].id_name, "acpi") == 0 && 836 cpu_idle_hook == NULL) 837 continue; 838 if (strcmp(idle_tbl[i].id_name, buf)) 839 continue; 840 cpu_idle_fn = idle_tbl[i].id_fn; 841 return (0); 842 } 843 return (EINVAL); 844} 845 846SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, 847 idle_sysctl, "A", "currently selected idle function"); 848 849/* 850 * Reset registers to default values on exec. 851 */ 852void 853exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) 854{ 855 struct trapframe *regs = td->td_frame; 856 struct pcb *pcb = td->td_pcb; 857 858 mtx_lock(&dt_lock); 859 if (td->td_proc->p_md.md_ldt != NULL) 860 user_ldt_free(td); 861 else 862 mtx_unlock(&dt_lock); 863 864 pcb->pcb_fsbase = 0; 865 pcb->pcb_gsbase = 0; 866 clear_pcb_flags(pcb, PCB_32BIT | PCB_GS32BIT); 867 pcb->pcb_initial_fpucw = __INITIAL_FPUCW__; 868 set_pcb_flags(pcb, PCB_FULL_IRET); 869 870 bzero((char *)regs, sizeof(struct trapframe)); 871 regs->tf_rip = imgp->entry_addr; 872 regs->tf_rsp = ((stack - 8) & ~0xFul) + 8; 873 regs->tf_rdi = stack; /* argv */ 874 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); 875 regs->tf_ss = _udatasel; 876 regs->tf_cs = _ucodesel; 877 regs->tf_ds = _udatasel; 878 regs->tf_es = _udatasel; 879 regs->tf_fs = _ufssel; 880 regs->tf_gs = _ugssel; 881 regs->tf_flags = TF_HASSEGS; 882 td->td_retval[1] = 0; 883 884 /* 885 * Reset the hardware debug registers if they were in use. 886 * They won't have any meaning for the newly exec'd process. 887 */ 888 if (pcb->pcb_flags & PCB_DBREGS) { 889 pcb->pcb_dr0 = 0; 890 pcb->pcb_dr1 = 0; 891 pcb->pcb_dr2 = 0; 892 pcb->pcb_dr3 = 0; 893 pcb->pcb_dr6 = 0; 894 pcb->pcb_dr7 = 0; 895 if (pcb == PCPU_GET(curpcb)) { 896 /* 897 * Clear the debug registers on the running 898 * CPU, otherwise they will end up affecting 899 * the next process we switch to. 900 */ 901 reset_dbregs(); 902 } 903 clear_pcb_flags(pcb, PCB_DBREGS); 904 } 905 906 /* 907 * Drop the FP state if we hold it, so that the process gets a 908 * clean FP state if it uses the FPU again. 909 */ 910 fpstate_drop(td); 911} 912 913void 914cpu_setregs(void) 915{ 916 register_t cr0; 917 918 cr0 = rcr0(); 919 /* 920 * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the 921 * BSP. See the comments there about why we set them. 922 */ 923 cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; 924 load_cr0(cr0); 925} 926 927/* 928 * Initialize amd64 and configure to run kernel 929 */ 930 931/* 932 * Initialize segments & interrupt table 933 */ 934 935struct user_segment_descriptor gdt[NGDT * MAXCPU];/* global descriptor tables */ 936static struct gate_descriptor idt0[NIDT]; 937struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ 938 939static char dblfault_stack[PAGE_SIZE] __aligned(16); 940 941static char nmi0_stack[PAGE_SIZE] __aligned(16); 942CTASSERT(sizeof(struct nmi_pcpu) == 16); 943 944struct amd64tss common_tss[MAXCPU]; 945 946/* 947 * Software prototypes -- in more palatable form. 948 * 949 * Keep GUFS32, GUGS32, GUCODE32 and GUDATA at the same 950 * slots as corresponding segments for i386 kernel. 951 */ 952struct soft_segment_descriptor gdt_segs[] = { 953/* GNULL_SEL 0 Null Descriptor */ 954{ .ssd_base = 0x0, 955 .ssd_limit = 0x0, 956 .ssd_type = 0, 957 .ssd_dpl = 0, 958 .ssd_p = 0, 959 .ssd_long = 0, 960 .ssd_def32 = 0, 961 .ssd_gran = 0 }, 962/* GNULL2_SEL 1 Null Descriptor */ 963{ .ssd_base = 0x0, 964 .ssd_limit = 0x0, 965 .ssd_type = 0, 966 .ssd_dpl = 0, 967 .ssd_p = 0, 968 .ssd_long = 0, 969 .ssd_def32 = 0, 970 .ssd_gran = 0 }, 971/* GUFS32_SEL 2 32 bit %gs Descriptor for user */ 972{ .ssd_base = 0x0, 973 .ssd_limit = 0xfffff, 974 .ssd_type = SDT_MEMRWA, 975 .ssd_dpl = SEL_UPL, 976 .ssd_p = 1, 977 .ssd_long = 0, 978 .ssd_def32 = 1, 979 .ssd_gran = 1 }, 980/* GUGS32_SEL 3 32 bit %fs Descriptor for user */ 981{ .ssd_base = 0x0, 982 .ssd_limit = 0xfffff, 983 .ssd_type = SDT_MEMRWA, 984 .ssd_dpl = SEL_UPL, 985 .ssd_p = 1, 986 .ssd_long = 0, 987 .ssd_def32 = 1, 988 .ssd_gran = 1 }, 989/* GCODE_SEL 4 Code Descriptor for kernel */ 990{ .ssd_base = 0x0, 991 .ssd_limit = 0xfffff, 992 .ssd_type = SDT_MEMERA, 993 .ssd_dpl = SEL_KPL, 994 .ssd_p = 1, 995 .ssd_long = 1, 996 .ssd_def32 = 0, 997 .ssd_gran = 1 }, 998/* GDATA_SEL 5 Data Descriptor for kernel */ 999{ .ssd_base = 0x0, 1000 .ssd_limit = 0xfffff, 1001 .ssd_type = SDT_MEMRWA, 1002 .ssd_dpl = SEL_KPL, 1003 .ssd_p = 1, 1004 .ssd_long = 1, 1005 .ssd_def32 = 0, 1006 .ssd_gran = 1 }, 1007/* GUCODE32_SEL 6 32 bit Code Descriptor for user */ 1008{ .ssd_base = 0x0, 1009 .ssd_limit = 0xfffff, 1010 .ssd_type = SDT_MEMERA, 1011 .ssd_dpl = SEL_UPL, 1012 .ssd_p = 1, 1013 .ssd_long = 0, 1014 .ssd_def32 = 1, 1015 .ssd_gran = 1 }, 1016/* GUDATA_SEL 7 32/64 bit Data Descriptor for user */ 1017{ .ssd_base = 0x0, 1018 .ssd_limit = 0xfffff, 1019 .ssd_type = SDT_MEMRWA, 1020 .ssd_dpl = SEL_UPL, 1021 .ssd_p = 1, 1022 .ssd_long = 0, 1023 .ssd_def32 = 1, 1024 .ssd_gran = 1 }, 1025/* GUCODE_SEL 8 64 bit Code Descriptor for user */ 1026{ .ssd_base = 0x0, 1027 .ssd_limit = 0xfffff, 1028 .ssd_type = SDT_MEMERA, 1029 .ssd_dpl = SEL_UPL, 1030 .ssd_p = 1, 1031 .ssd_long = 1, 1032 .ssd_def32 = 0, 1033 .ssd_gran = 1 }, 1034/* GPROC0_SEL 9 Proc 0 Tss Descriptor */ 1035{ .ssd_base = 0x0, 1036 .ssd_limit = sizeof(struct amd64tss) + IOPAGES * PAGE_SIZE - 1, 1037 .ssd_type = SDT_SYSTSS, 1038 .ssd_dpl = SEL_KPL, 1039 .ssd_p = 1, 1040 .ssd_long = 0, 1041 .ssd_def32 = 0, 1042 .ssd_gran = 0 }, 1043/* Actually, the TSS is a system descriptor which is double size */ 1044{ .ssd_base = 0x0, 1045 .ssd_limit = 0x0, 1046 .ssd_type = 0, 1047 .ssd_dpl = 0, 1048 .ssd_p = 0, 1049 .ssd_long = 0, 1050 .ssd_def32 = 0, 1051 .ssd_gran = 0 }, 1052/* GUSERLDT_SEL 11 LDT Descriptor */ 1053{ .ssd_base = 0x0, 1054 .ssd_limit = 0x0, 1055 .ssd_type = 0, 1056 .ssd_dpl = 0, 1057 .ssd_p = 0, 1058 .ssd_long = 0, 1059 .ssd_def32 = 0, 1060 .ssd_gran = 0 }, 1061/* GUSERLDT_SEL 12 LDT Descriptor, double size */ 1062{ .ssd_base = 0x0, 1063 .ssd_limit = 0x0, 1064 .ssd_type = 0, 1065 .ssd_dpl = 0, 1066 .ssd_p = 0, 1067 .ssd_long = 0, 1068 .ssd_def32 = 0, 1069 .ssd_gran = 0 }, 1070}; 1071 1072void 1073setidt(idx, func, typ, dpl, ist) 1074 int idx; 1075 inthand_t *func; 1076 int typ; 1077 int dpl; 1078 int ist; 1079{ 1080 struct gate_descriptor *ip; 1081 1082 ip = idt + idx; 1083 ip->gd_looffset = (uintptr_t)func; 1084 ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL); 1085 ip->gd_ist = ist; 1086 ip->gd_xx = 0; 1087 ip->gd_type = typ; 1088 ip->gd_dpl = dpl; 1089 ip->gd_p = 1; 1090 ip->gd_hioffset = ((uintptr_t)func)>>16 ; 1091} 1092 1093extern inthand_t 1094 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), 1095 IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), 1096 IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), 1097 IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), 1098 IDTVEC(xmm), IDTVEC(dblfault), 1099#ifdef KDTRACE_HOOKS 1100 IDTVEC(dtrace_ret), 1101#endif 1102 IDTVEC(fast_syscall), IDTVEC(fast_syscall32); 1103 1104#ifdef DDB 1105/* 1106 * Display the index and function name of any IDT entries that don't use 1107 * the default 'rsvd' entry point. 1108 */ 1109DB_SHOW_COMMAND(idt, db_show_idt) 1110{ 1111 struct gate_descriptor *ip; 1112 int idx; 1113 uintptr_t func; 1114 1115 ip = idt; 1116 for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { 1117 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); 1118 if (func != (uintptr_t)&IDTVEC(rsvd)) { 1119 db_printf("%3d\t", idx); 1120 db_printsym(func, DB_STGY_PROC); 1121 db_printf("\n"); 1122 } 1123 ip++; 1124 } 1125} 1126#endif 1127 1128void 1129sdtossd(sd, ssd) 1130 struct user_segment_descriptor *sd; 1131 struct soft_segment_descriptor *ssd; 1132{ 1133 1134 ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; 1135 ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; 1136 ssd->ssd_type = sd->sd_type; 1137 ssd->ssd_dpl = sd->sd_dpl; 1138 ssd->ssd_p = sd->sd_p; 1139 ssd->ssd_long = sd->sd_long; 1140 ssd->ssd_def32 = sd->sd_def32; 1141 ssd->ssd_gran = sd->sd_gran; 1142} 1143 1144void 1145ssdtosd(ssd, sd) 1146 struct soft_segment_descriptor *ssd; 1147 struct user_segment_descriptor *sd; 1148{ 1149 1150 sd->sd_lobase = (ssd->ssd_base) & 0xffffff; 1151 sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff; 1152 sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; 1153 sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; 1154 sd->sd_type = ssd->ssd_type; 1155 sd->sd_dpl = ssd->ssd_dpl; 1156 sd->sd_p = ssd->ssd_p; 1157 sd->sd_long = ssd->ssd_long; 1158 sd->sd_def32 = ssd->ssd_def32; 1159 sd->sd_gran = ssd->ssd_gran; 1160} 1161 1162void 1163ssdtosyssd(ssd, sd) 1164 struct soft_segment_descriptor *ssd; 1165 struct system_segment_descriptor *sd; 1166{ 1167 1168 sd->sd_lobase = (ssd->ssd_base) & 0xffffff; 1169 sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful; 1170 sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; 1171 sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; 1172 sd->sd_type = ssd->ssd_type; 1173 sd->sd_dpl = ssd->ssd_dpl; 1174 sd->sd_p = ssd->ssd_p; 1175 sd->sd_gran = ssd->ssd_gran; 1176} 1177 1178#if !defined(DEV_ATPIC) && defined(DEV_ISA) 1179#include <isa/isavar.h> 1180#include <isa/isareg.h> 1181/* 1182 * Return a bitmap of the current interrupt requests. This is 8259-specific 1183 * and is only suitable for use at probe time. 1184 * This is only here to pacify sio. It is NOT FATAL if this doesn't work. 1185 * It shouldn't be here. There should probably be an APIC centric 1186 * implementation in the apic driver code, if at all. 1187 */ 1188intrmask_t 1189isa_irq_pending(void) 1190{ 1191 u_char irr1; 1192 u_char irr2; 1193 1194 irr1 = inb(IO_ICU1); 1195 irr2 = inb(IO_ICU2); 1196 return ((irr2 << 8) | irr1); 1197} 1198#endif 1199 1200u_int basemem; 1201 1202static int 1203add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp) 1204{ 1205 int i, insert_idx, physmap_idx; 1206 1207 physmap_idx = *physmap_idxp; 1208 1209 if (boothowto & RB_VERBOSE) 1210 printf("SMAP type=%02x base=%016lx len=%016lx\n", 1211 smap->type, smap->base, smap->length); 1212 1213 if (smap->type != SMAP_TYPE_MEMORY) 1214 return (1); 1215 1216 if (smap->length == 0) 1217 return (0); 1218 1219 /* 1220 * Find insertion point while checking for overlap. Start off by 1221 * assuming the new entry will be added to the end. 1222 */ 1223 insert_idx = physmap_idx + 2; 1224 for (i = 0; i <= physmap_idx; i += 2) { 1225 if (smap->base < physmap[i + 1]) { 1226 if (smap->base + smap->length <= physmap[i]) { 1227 insert_idx = i; 1228 break; 1229 } 1230 if (boothowto & RB_VERBOSE) 1231 printf( 1232 "Overlapping memory regions, ignoring second region\n"); 1233 return (1); 1234 } 1235 } 1236 1237 /* See if we can prepend to the next entry. */ 1238 if (insert_idx <= physmap_idx && 1239 smap->base + smap->length == physmap[insert_idx]) { 1240 physmap[insert_idx] = smap->base; 1241 return (1); 1242 } 1243 1244 /* See if we can append to the previous entry. */ 1245 if (insert_idx > 0 && smap->base == physmap[insert_idx - 1]) { 1246 physmap[insert_idx - 1] += smap->length; 1247 return (1); 1248 } 1249 1250 physmap_idx += 2; 1251 *physmap_idxp = physmap_idx; 1252 if (physmap_idx == PHYSMAP_SIZE) { 1253 printf( 1254 "Too many segments in the physical address map, giving up\n"); 1255 return (0); 1256 } 1257 1258 /* 1259 * Move the last 'N' entries down to make room for the new 1260 * entry if needed. 1261 */ 1262 for (i = physmap_idx; i > insert_idx; i -= 2) { 1263 physmap[i] = physmap[i - 2]; 1264 physmap[i + 1] = physmap[i - 1]; 1265 } 1266 1267 /* Insert the new entry. */ 1268 physmap[insert_idx] = smap->base; 1269 physmap[insert_idx + 1] = smap->base + smap->length; 1270 return (1); 1271} 1272 1273/* 1274 * Populate the (physmap) array with base/bound pairs describing the 1275 * available physical memory in the system, then test this memory and 1276 * build the phys_avail array describing the actually-available memory. 1277 * 1278 * If we cannot accurately determine the physical memory map, then use 1279 * value from the 0xE801 call, and failing that, the RTC. 1280 * 1281 * Total memory size may be set by the kernel environment variable 1282 * hw.physmem or the compile-time define MAXMEM. 1283 * 1284 * XXX first should be vm_paddr_t. 1285 */ 1286static void 1287getmemsize(caddr_t kmdp, u_int64_t first) 1288{ 1289 int i, physmap_idx, pa_indx, da_indx; 1290 vm_paddr_t pa, physmap[PHYSMAP_SIZE]; 1291 u_long physmem_tunable; 1292 pt_entry_t *pte; 1293 struct bios_smap *smapbase, *smap, *smapend; 1294 u_int32_t smapsize; 1295 quad_t dcons_addr, dcons_size; 1296 1297 bzero(physmap, sizeof(physmap)); 1298 basemem = 0; 1299 physmap_idx = 0; 1300 1301 /* 1302 * get memory map from INT 15:E820, kindly supplied by the loader. 1303 * 1304 * subr_module.c says: 1305 * "Consumer may safely assume that size value precedes data." 1306 * ie: an int32_t immediately precedes smap. 1307 */ 1308 smapbase = (struct bios_smap *)preload_search_info(kmdp, 1309 MODINFO_METADATA | MODINFOMD_SMAP); 1310 if (smapbase == NULL) 1311 panic("No BIOS smap info from loader!"); 1312 1313 smapsize = *((u_int32_t *)smapbase - 1); 1314 smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); 1315 1316 for (smap = smapbase; smap < smapend; smap++) 1317 if (!add_smap_entry(smap, physmap, &physmap_idx)) 1318 break; 1319 1320 /* 1321 * Find the 'base memory' segment for SMP 1322 */ 1323 basemem = 0; 1324 for (i = 0; i <= physmap_idx; i += 2) { 1325 if (physmap[i] == 0x00000000) { 1326 basemem = physmap[i + 1] / 1024; 1327 break; 1328 } 1329 } 1330 if (basemem == 0) 1331 panic("BIOS smap did not include a basemem segment!"); 1332 1333#ifdef SMP 1334 /* make hole for AP bootstrap code */ 1335 physmap[1] = mp_bootaddress(physmap[1] / 1024); 1336#endif 1337 1338 /* 1339 * Maxmem isn't the "maximum memory", it's one larger than the 1340 * highest page of the physical address space. It should be 1341 * called something like "Maxphyspage". We may adjust this 1342 * based on ``hw.physmem'' and the results of the memory test. 1343 */ 1344 Maxmem = atop(physmap[physmap_idx + 1]); 1345 1346#ifdef MAXMEM 1347 Maxmem = MAXMEM / 4; 1348#endif 1349 1350 if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) 1351 Maxmem = atop(physmem_tunable); 1352 1353 /* 1354 * Don't allow MAXMEM or hw.physmem to extend the amount of memory 1355 * in the system. 1356 */ 1357 if (Maxmem > atop(physmap[physmap_idx + 1])) 1358 Maxmem = atop(physmap[physmap_idx + 1]); 1359 1360 if (atop(physmap[physmap_idx + 1]) != Maxmem && 1361 (boothowto & RB_VERBOSE)) 1362 printf("Physical memory use set to %ldK\n", Maxmem * 4); 1363 1364 /* call pmap initialization to make new kernel address space */ 1365 pmap_bootstrap(&first); 1366 1367 /* 1368 * Size up each available chunk of physical memory. 1369 */ 1370 physmap[0] = PAGE_SIZE; /* mask off page 0 */ 1371 pa_indx = 0; 1372 da_indx = 1; 1373 phys_avail[pa_indx++] = physmap[0]; 1374 phys_avail[pa_indx] = physmap[0]; 1375 dump_avail[da_indx] = physmap[0]; 1376 pte = CMAP1; 1377 1378 /* 1379 * Get dcons buffer address 1380 */ 1381 if (getenv_quad("dcons.addr", &dcons_addr) == 0 || 1382 getenv_quad("dcons.size", &dcons_size) == 0) 1383 dcons_addr = 0; 1384 1385 /* 1386 * physmap is in bytes, so when converting to page boundaries, 1387 * round up the start address and round down the end address. 1388 */ 1389 for (i = 0; i <= physmap_idx; i += 2) { 1390 vm_paddr_t end; 1391 1392 end = ptoa((vm_paddr_t)Maxmem); 1393 if (physmap[i + 1] < end) 1394 end = trunc_page(physmap[i + 1]); 1395 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { 1396 int tmp, page_bad, full; 1397 int *ptr = (int *)CADDR1; 1398 1399 full = FALSE; 1400 /* 1401 * block out kernel memory as not available. 1402 */ 1403 if (pa >= 0x100000 && pa < first) 1404 goto do_dump_avail; 1405 1406 /* 1407 * block out dcons buffer 1408 */ 1409 if (dcons_addr > 0 1410 && pa >= trunc_page(dcons_addr) 1411 && pa < dcons_addr + dcons_size) 1412 goto do_dump_avail; 1413 1414 page_bad = FALSE; 1415 1416 /* 1417 * map page into kernel: valid, read/write,non-cacheable 1418 */ 1419 *pte = pa | PG_V | PG_RW | PG_N; 1420 invltlb(); 1421 1422 tmp = *(int *)ptr; 1423 /* 1424 * Test for alternating 1's and 0's 1425 */ 1426 *(volatile int *)ptr = 0xaaaaaaaa; 1427 if (*(volatile int *)ptr != 0xaaaaaaaa) 1428 page_bad = TRUE; 1429 /* 1430 * Test for alternating 0's and 1's 1431 */ 1432 *(volatile int *)ptr = 0x55555555; 1433 if (*(volatile int *)ptr != 0x55555555) 1434 page_bad = TRUE; 1435 /* 1436 * Test for all 1's 1437 */ 1438 *(volatile int *)ptr = 0xffffffff; 1439 if (*(volatile int *)ptr != 0xffffffff) 1440 page_bad = TRUE; 1441 /* 1442 * Test for all 0's 1443 */ 1444 *(volatile int *)ptr = 0x0; 1445 if (*(volatile int *)ptr != 0x0) 1446 page_bad = TRUE; 1447 /* 1448 * Restore original value. 1449 */ 1450 *(int *)ptr = tmp; 1451 1452 /* 1453 * Adjust array of valid/good pages. 1454 */ 1455 if (page_bad == TRUE) 1456 continue; 1457 /* 1458 * If this good page is a continuation of the 1459 * previous set of good pages, then just increase 1460 * the end pointer. Otherwise start a new chunk. 1461 * Note that "end" points one higher than end, 1462 * making the range >= start and < end. 1463 * If we're also doing a speculative memory 1464 * test and we at or past the end, bump up Maxmem 1465 * so that we keep going. The first bad page 1466 * will terminate the loop. 1467 */ 1468 if (phys_avail[pa_indx] == pa) { 1469 phys_avail[pa_indx] += PAGE_SIZE; 1470 } else { 1471 pa_indx++; 1472 if (pa_indx == PHYS_AVAIL_ARRAY_END) { 1473 printf( 1474 "Too many holes in the physical address space, giving up\n"); 1475 pa_indx--; 1476 full = TRUE; 1477 goto do_dump_avail; 1478 } 1479 phys_avail[pa_indx++] = pa; /* start */ 1480 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ 1481 } 1482 physmem++; 1483do_dump_avail: 1484 if (dump_avail[da_indx] == pa) { 1485 dump_avail[da_indx] += PAGE_SIZE; 1486 } else { 1487 da_indx++; 1488 if (da_indx == DUMP_AVAIL_ARRAY_END) { 1489 da_indx--; 1490 goto do_next; 1491 } 1492 dump_avail[da_indx++] = pa; /* start */ 1493 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ 1494 } 1495do_next: 1496 if (full) 1497 break; 1498 } 1499 } 1500 *pte = 0; 1501 invltlb(); 1502 1503 /* 1504 * XXX 1505 * The last chunk must contain at least one page plus the message 1506 * buffer to avoid complicating other code (message buffer address 1507 * calculation, etc.). 1508 */ 1509 while (phys_avail[pa_indx - 1] + PAGE_SIZE + 1510 round_page(msgbufsize) >= phys_avail[pa_indx]) { 1511 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); 1512 phys_avail[pa_indx--] = 0; 1513 phys_avail[pa_indx--] = 0; 1514 } 1515 1516 Maxmem = atop(phys_avail[pa_indx]); 1517 1518 /* Trim off space for the message buffer. */ 1519 phys_avail[pa_indx] -= round_page(msgbufsize); 1520 1521 /* Map the message buffer. */ 1522 msgbufp = (struct msgbuf *)PHYS_TO_DMAP(phys_avail[pa_indx]); 1523} 1524 1525u_int64_t 1526hammer_time(u_int64_t modulep, u_int64_t physfree) 1527{ 1528 caddr_t kmdp; 1529 int gsel_tss, x; 1530 struct pcpu *pc; 1531 struct nmi_pcpu *np; 1532 u_int64_t msr; 1533 char *env; 1534 size_t kstack0_sz; 1535 1536 thread0.td_kstack = physfree + KERNBASE; 1537 thread0.td_kstack_pages = KSTACK_PAGES; 1538 kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE; 1539 bzero((void *)thread0.td_kstack, kstack0_sz); 1540 physfree += kstack0_sz; 1541 thread0.td_pcb = (struct pcb *)(thread0.td_kstack + kstack0_sz) - 1; 1542 1543 /* 1544 * This may be done better later if it gets more high level 1545 * components in it. If so just link td->td_proc here. 1546 */ 1547 proc_linkup0(&proc0, &thread0); 1548 1549 preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE); 1550 preload_bootstrap_relocate(KERNBASE); 1551 kmdp = preload_search_by_type("elf kernel"); 1552 if (kmdp == NULL) 1553 kmdp = preload_search_by_type("elf64 kernel"); 1554 boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); 1555 kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *) + KERNBASE; 1556#ifdef DDB 1557 ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); 1558 ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); 1559#endif 1560 1561 /* Init basic tunables, hz etc */ 1562 init_param1(); 1563 1564 /* 1565 * make gdt memory segments 1566 */ 1567 for (x = 0; x < NGDT; x++) { 1568 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && 1569 x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1) 1570 ssdtosd(&gdt_segs[x], &gdt[x]); 1571 } 1572 gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0]; 1573 ssdtosyssd(&gdt_segs[GPROC0_SEL], 1574 (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); 1575 1576 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 1577 r_gdt.rd_base = (long) gdt; 1578 lgdt(&r_gdt); 1579 pc = &__pcpu[0]; 1580 1581 wrmsr(MSR_FSBASE, 0); /* User value */ 1582 wrmsr(MSR_GSBASE, (u_int64_t)pc); 1583 wrmsr(MSR_KGSBASE, 0); /* User value while in the kernel */ 1584 1585 pcpu_init(pc, 0, sizeof(struct pcpu)); 1586 dpcpu_init((void *)(physfree + KERNBASE), 0); 1587 physfree += DPCPU_SIZE; 1588 PCPU_SET(prvspace, pc); 1589 PCPU_SET(curthread, &thread0); 1590 PCPU_SET(curpcb, thread0.td_pcb); 1591 PCPU_SET(tssp, &common_tss[0]); 1592 PCPU_SET(commontssp, &common_tss[0]); 1593 PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); 1594 PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]); 1595 PCPU_SET(fs32p, &gdt[GUFS32_SEL]); 1596 PCPU_SET(gs32p, &gdt[GUGS32_SEL]); 1597 1598 /* 1599 * Initialize mutexes. 1600 * 1601 * icu_lock: in order to allow an interrupt to occur in a critical 1602 * section, to set pcpu->ipending (etc...) properly, we 1603 * must be able to get the icu lock, so it can't be 1604 * under witness. 1605 */ 1606 mutex_init(); 1607 mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); 1608 mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF); 1609 1610 /* exceptions */ 1611 for (x = 0; x < NIDT; x++) 1612 setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); 1613 setidt(IDT_DE, &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0); 1614 setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 0); 1615 setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2); 1616 setidt(IDT_BP, &IDTVEC(bpt), SDT_SYSIGT, SEL_UPL, 0); 1617 setidt(IDT_OF, &IDTVEC(ofl), SDT_SYSIGT, SEL_KPL, 0); 1618 setidt(IDT_BR, &IDTVEC(bnd), SDT_SYSIGT, SEL_KPL, 0); 1619 setidt(IDT_UD, &IDTVEC(ill), SDT_SYSIGT, SEL_KPL, 0); 1620 setidt(IDT_NM, &IDTVEC(dna), SDT_SYSIGT, SEL_KPL, 0); 1621 setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1); 1622 setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYSIGT, SEL_KPL, 0); 1623 setidt(IDT_TS, &IDTVEC(tss), SDT_SYSIGT, SEL_KPL, 0); 1624 setidt(IDT_NP, &IDTVEC(missing), SDT_SYSIGT, SEL_KPL, 0); 1625 setidt(IDT_SS, &IDTVEC(stk), SDT_SYSIGT, SEL_KPL, 0); 1626 setidt(IDT_GP, &IDTVEC(prot), SDT_SYSIGT, SEL_KPL, 0); 1627 setidt(IDT_PF, &IDTVEC(page), SDT_SYSIGT, SEL_KPL, 0); 1628 setidt(IDT_MF, &IDTVEC(fpu), SDT_SYSIGT, SEL_KPL, 0); 1629 setidt(IDT_AC, &IDTVEC(align), SDT_SYSIGT, SEL_KPL, 0); 1630 setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 0); 1631 setidt(IDT_XF, &IDTVEC(xmm), SDT_SYSIGT, SEL_KPL, 0); 1632#ifdef KDTRACE_HOOKS 1633 setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0); 1634#endif 1635 1636 r_idt.rd_limit = sizeof(idt0) - 1; 1637 r_idt.rd_base = (long) idt; 1638 lidt(&r_idt); 1639 1640 /* 1641 * Initialize the i8254 before the console so that console 1642 * initialization can use DELAY(). 1643 */ 1644 i8254_init(); 1645 1646 /* 1647 * Initialize the console before we print anything out. 1648 */ 1649 cninit(); 1650 1651#ifdef DEV_ISA 1652#ifdef DEV_ATPIC 1653 elcr_probe(); 1654 atpic_startup(); 1655#else 1656 /* Reset and mask the atpics and leave them shut down. */ 1657 atpic_reset(); 1658 1659 /* 1660 * Point the ICU spurious interrupt vectors at the APIC spurious 1661 * interrupt handler. 1662 */ 1663 setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); 1664 setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); 1665#endif 1666#else 1667#error "have you forgotten the isa device?"; 1668#endif 1669 1670 kdb_init(); 1671 1672#ifdef KDB 1673 if (boothowto & RB_KDB) 1674 kdb_enter(KDB_WHY_BOOTFLAGS, 1675 "Boot flags requested debugger"); 1676#endif 1677 1678 identify_cpu(); /* Final stage of CPU initialization */ 1679 initializecpu(); /* Initialize CPU registers */ 1680 initializecpucache(); 1681 1682 /* make an initial tss so cpu can get interrupt stack on syscall! */ 1683 common_tss[0].tss_rsp0 = thread0.td_kstack + 1684 kstack0_sz - sizeof(struct pcb); 1685 /* Ensure the stack is aligned to 16 bytes */ 1686 common_tss[0].tss_rsp0 &= ~0xFul; 1687 PCPU_SET(rsp0, common_tss[0].tss_rsp0); 1688 1689 /* doublefault stack space, runs on ist1 */ 1690 common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; 1691 1692 /* 1693 * NMI stack, runs on ist2. The pcpu pointer is stored just 1694 * above the start of the ist2 stack. 1695 */ 1696 np = ((struct nmi_pcpu *) &nmi0_stack[sizeof(nmi0_stack)]) - 1; 1697 np->np_pcpu = (register_t) pc; 1698 common_tss[0].tss_ist2 = (long) np; 1699 1700 /* Set the IO permission bitmap (empty due to tss seg limit) */ 1701 common_tss[0].tss_iobase = sizeof(struct amd64tss) + 1702 IOPAGES * PAGE_SIZE; 1703 1704 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 1705 ltr(gsel_tss); 1706 1707 /* Set up the fast syscall stuff */ 1708 msr = rdmsr(MSR_EFER) | EFER_SCE; 1709 wrmsr(MSR_EFER, msr); 1710 wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall)); 1711 wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); 1712 msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | 1713 ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); 1714 wrmsr(MSR_STAR, msr); 1715 wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); 1716 1717 getmemsize(kmdp, physfree); 1718 init_param2(physmem); 1719 1720 /* now running on new page tables, configured,and u/iom is accessible */ 1721 1722 msgbufinit(msgbufp, msgbufsize); 1723 fpuinit(); 1724 1725 /* transfer to user mode */ 1726 1727 _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); 1728 _udatasel = GSEL(GUDATA_SEL, SEL_UPL); 1729 _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL); 1730 _ufssel = GSEL(GUFS32_SEL, SEL_UPL); 1731 _ugssel = GSEL(GUGS32_SEL, SEL_UPL); 1732 1733 load_ds(_udatasel); 1734 load_es(_udatasel); 1735 load_fs(_ufssel); 1736 1737 /* setup proc 0's pcb */ 1738 thread0.td_pcb->pcb_flags = 0; 1739 thread0.td_pcb->pcb_cr3 = KPML4phys; 1740 thread0.td_frame = &proc0_tf; 1741 1742 env = getenv("kernelname"); 1743 if (env != NULL) 1744 strlcpy(kernelname, env, MAXPATHLEN); 1745 1746#ifdef XENHVM 1747 if (inw(0x10) == 0x49d2) { 1748 if (bootverbose) 1749 printf("Xen detected: disabling emulated block and network devices\n"); 1750 outw(0x10, 3); 1751 } 1752#endif 1753 1754 cpu_probe_amdc1e(); 1755 1756 /* Location of kernel stack for locore */ 1757 return ((u_int64_t)thread0.td_pcb); 1758} 1759 1760void 1761cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) 1762{ 1763 1764 pcpu->pc_acpi_id = 0xffffffff; 1765} 1766 1767void 1768spinlock_enter(void) 1769{ 1770 struct thread *td; 1771 register_t flags; 1772 1773 td = curthread; 1774 if (td->td_md.md_spinlock_count == 0) { 1775 flags = intr_disable(); 1776 td->td_md.md_spinlock_count = 1; 1777 td->td_md.md_saved_flags = flags; 1778 } else 1779 td->td_md.md_spinlock_count++; 1780 critical_enter(); 1781} 1782 1783void 1784spinlock_exit(void) 1785{ 1786 struct thread *td; 1787 register_t flags; 1788 1789 td = curthread; 1790 critical_exit(); 1791 flags = td->td_md.md_saved_flags; 1792 td->td_md.md_spinlock_count--; 1793 if (td->td_md.md_spinlock_count == 0) 1794 intr_restore(flags); 1795} 1796 1797/* 1798 * Construct a PCB from a trapframe. This is called from kdb_trap() where 1799 * we want to start a backtrace from the function that caused us to enter 1800 * the debugger. We have the context in the trapframe, but base the trace 1801 * on the PCB. The PCB doesn't have to be perfect, as long as it contains 1802 * enough for a backtrace. 1803 */ 1804void 1805makectx(struct trapframe *tf, struct pcb *pcb) 1806{ 1807 1808 pcb->pcb_r12 = tf->tf_r12; 1809 pcb->pcb_r13 = tf->tf_r13; 1810 pcb->pcb_r14 = tf->tf_r14; 1811 pcb->pcb_r15 = tf->tf_r15; 1812 pcb->pcb_rbp = tf->tf_rbp; 1813 pcb->pcb_rbx = tf->tf_rbx; 1814 pcb->pcb_rip = tf->tf_rip; 1815 pcb->pcb_rsp = tf->tf_rsp; 1816} 1817 1818int 1819ptrace_set_pc(struct thread *td, unsigned long addr) 1820{ 1821 td->td_frame->tf_rip = addr; 1822 return (0); 1823} 1824 1825int 1826ptrace_single_step(struct thread *td) 1827{ 1828 td->td_frame->tf_rflags |= PSL_T; 1829 return (0); 1830} 1831 1832int 1833ptrace_clear_single_step(struct thread *td) 1834{ 1835 td->td_frame->tf_rflags &= ~PSL_T; 1836 return (0); 1837} 1838 1839int 1840fill_regs(struct thread *td, struct reg *regs) 1841{ 1842 struct trapframe *tp; 1843 1844 tp = td->td_frame; 1845 return (fill_frame_regs(tp, regs)); 1846} 1847 1848int 1849fill_frame_regs(struct trapframe *tp, struct reg *regs) 1850{ 1851 regs->r_r15 = tp->tf_r15; 1852 regs->r_r14 = tp->tf_r14; 1853 regs->r_r13 = tp->tf_r13; 1854 regs->r_r12 = tp->tf_r12; 1855 regs->r_r11 = tp->tf_r11; 1856 regs->r_r10 = tp->tf_r10; 1857 regs->r_r9 = tp->tf_r9; 1858 regs->r_r8 = tp->tf_r8; 1859 regs->r_rdi = tp->tf_rdi; 1860 regs->r_rsi = tp->tf_rsi; 1861 regs->r_rbp = tp->tf_rbp; 1862 regs->r_rbx = tp->tf_rbx; 1863 regs->r_rdx = tp->tf_rdx; 1864 regs->r_rcx = tp->tf_rcx; 1865 regs->r_rax = tp->tf_rax; 1866 regs->r_rip = tp->tf_rip; 1867 regs->r_cs = tp->tf_cs; 1868 regs->r_rflags = tp->tf_rflags; 1869 regs->r_rsp = tp->tf_rsp; 1870 regs->r_ss = tp->tf_ss; 1871 if (tp->tf_flags & TF_HASSEGS) { 1872 regs->r_ds = tp->tf_ds; 1873 regs->r_es = tp->tf_es; 1874 regs->r_fs = tp->tf_fs; 1875 regs->r_gs = tp->tf_gs; 1876 } else { 1877 regs->r_ds = 0; 1878 regs->r_es = 0; 1879 regs->r_fs = 0; 1880 regs->r_gs = 0; 1881 } 1882 return (0); 1883} 1884 1885int 1886set_regs(struct thread *td, struct reg *regs) 1887{ 1888 struct trapframe *tp; 1889 register_t rflags; 1890 1891 tp = td->td_frame; 1892 rflags = regs->r_rflags & 0xffffffff; 1893 if (!EFL_SECURE(rflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs)) 1894 return (EINVAL); 1895 tp->tf_r15 = regs->r_r15; 1896 tp->tf_r14 = regs->r_r14; 1897 tp->tf_r13 = regs->r_r13; 1898 tp->tf_r12 = regs->r_r12; 1899 tp->tf_r11 = regs->r_r11; 1900 tp->tf_r10 = regs->r_r10; 1901 tp->tf_r9 = regs->r_r9; 1902 tp->tf_r8 = regs->r_r8; 1903 tp->tf_rdi = regs->r_rdi; 1904 tp->tf_rsi = regs->r_rsi; 1905 tp->tf_rbp = regs->r_rbp; 1906 tp->tf_rbx = regs->r_rbx; 1907 tp->tf_rdx = regs->r_rdx; 1908 tp->tf_rcx = regs->r_rcx; 1909 tp->tf_rax = regs->r_rax; 1910 tp->tf_rip = regs->r_rip; 1911 tp->tf_cs = regs->r_cs; 1912 tp->tf_rflags = rflags; 1913 tp->tf_rsp = regs->r_rsp; 1914 tp->tf_ss = regs->r_ss; 1915 if (0) { /* XXXKIB */ 1916 tp->tf_ds = regs->r_ds; 1917 tp->tf_es = regs->r_es; 1918 tp->tf_fs = regs->r_fs; 1919 tp->tf_gs = regs->r_gs; 1920 tp->tf_flags = TF_HASSEGS; 1921 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 1922 } 1923 return (0); 1924} 1925 1926/* XXX check all this stuff! */ 1927/* externalize from sv_xmm */ 1928static void 1929fill_fpregs_xmm(struct savefpu *sv_xmm, struct fpreg *fpregs) 1930{ 1931 struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env; 1932 struct envxmm *penv_xmm = &sv_xmm->sv_env; 1933 int i; 1934 1935 /* pcb -> fpregs */ 1936 bzero(fpregs, sizeof(*fpregs)); 1937 1938 /* FPU control/status */ 1939 penv_fpreg->en_cw = penv_xmm->en_cw; 1940 penv_fpreg->en_sw = penv_xmm->en_sw; 1941 penv_fpreg->en_tw = penv_xmm->en_tw; 1942 penv_fpreg->en_opcode = penv_xmm->en_opcode; 1943 penv_fpreg->en_rip = penv_xmm->en_rip; 1944 penv_fpreg->en_rdp = penv_xmm->en_rdp; 1945 penv_fpreg->en_mxcsr = penv_xmm->en_mxcsr; 1946 penv_fpreg->en_mxcsr_mask = penv_xmm->en_mxcsr_mask; 1947 1948 /* FPU registers */ 1949 for (i = 0; i < 8; ++i) 1950 bcopy(sv_xmm->sv_fp[i].fp_acc.fp_bytes, fpregs->fpr_acc[i], 10); 1951 1952 /* SSE registers */ 1953 for (i = 0; i < 16; ++i) 1954 bcopy(sv_xmm->sv_xmm[i].xmm_bytes, fpregs->fpr_xacc[i], 16); 1955} 1956 1957/* internalize from fpregs into sv_xmm */ 1958static void 1959set_fpregs_xmm(struct fpreg *fpregs, struct savefpu *sv_xmm) 1960{ 1961 struct envxmm *penv_xmm = &sv_xmm->sv_env; 1962 struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env; 1963 int i; 1964 1965 /* fpregs -> pcb */ 1966 /* FPU control/status */ 1967 penv_xmm->en_cw = penv_fpreg->en_cw; 1968 penv_xmm->en_sw = penv_fpreg->en_sw; 1969 penv_xmm->en_tw = penv_fpreg->en_tw; 1970 penv_xmm->en_opcode = penv_fpreg->en_opcode; 1971 penv_xmm->en_rip = penv_fpreg->en_rip; 1972 penv_xmm->en_rdp = penv_fpreg->en_rdp; 1973 penv_xmm->en_mxcsr = penv_fpreg->en_mxcsr; 1974 penv_xmm->en_mxcsr_mask = penv_fpreg->en_mxcsr_mask & cpu_mxcsr_mask; 1975 1976 /* FPU registers */ 1977 for (i = 0; i < 8; ++i) 1978 bcopy(fpregs->fpr_acc[i], sv_xmm->sv_fp[i].fp_acc.fp_bytes, 10); 1979 1980 /* SSE registers */ 1981 for (i = 0; i < 16; ++i) 1982 bcopy(fpregs->fpr_xacc[i], sv_xmm->sv_xmm[i].xmm_bytes, 16); 1983} 1984 1985/* externalize from td->pcb */ 1986int 1987fill_fpregs(struct thread *td, struct fpreg *fpregs) 1988{ 1989 1990 KASSERT(td == curthread || TD_IS_SUSPENDED(td), 1991 ("not suspended thread %p", td)); 1992 fpugetregs(td); 1993 fill_fpregs_xmm(&td->td_pcb->pcb_user_save, fpregs); 1994 return (0); 1995} 1996 1997/* internalize to td->pcb */ 1998int 1999set_fpregs(struct thread *td, struct fpreg *fpregs) 2000{ 2001 2002 set_fpregs_xmm(fpregs, &td->td_pcb->pcb_user_save); 2003 fpuuserinited(td); 2004 return (0); 2005} 2006 2007/* 2008 * Get machine context. 2009 */ 2010int 2011get_mcontext(struct thread *td, mcontext_t *mcp, int flags) 2012{ 2013 struct pcb *pcb; 2014 struct trapframe *tp; 2015 2016 pcb = td->td_pcb; 2017 tp = td->td_frame; 2018 PROC_LOCK(curthread->td_proc); 2019 mcp->mc_onstack = sigonstack(tp->tf_rsp); 2020 PROC_UNLOCK(curthread->td_proc); 2021 mcp->mc_r15 = tp->tf_r15; 2022 mcp->mc_r14 = tp->tf_r14; 2023 mcp->mc_r13 = tp->tf_r13; 2024 mcp->mc_r12 = tp->tf_r12; 2025 mcp->mc_r11 = tp->tf_r11; 2026 mcp->mc_r10 = tp->tf_r10; 2027 mcp->mc_r9 = tp->tf_r9; 2028 mcp->mc_r8 = tp->tf_r8; 2029 mcp->mc_rdi = tp->tf_rdi; 2030 mcp->mc_rsi = tp->tf_rsi; 2031 mcp->mc_rbp = tp->tf_rbp; 2032 mcp->mc_rbx = tp->tf_rbx; 2033 mcp->mc_rcx = tp->tf_rcx; 2034 mcp->mc_rflags = tp->tf_rflags; 2035 if (flags & GET_MC_CLEAR_RET) { 2036 mcp->mc_rax = 0; 2037 mcp->mc_rdx = 0; 2038 mcp->mc_rflags &= ~PSL_C; 2039 } else { 2040 mcp->mc_rax = tp->tf_rax; 2041 mcp->mc_rdx = tp->tf_rdx; 2042 } 2043 mcp->mc_rip = tp->tf_rip; 2044 mcp->mc_cs = tp->tf_cs; 2045 mcp->mc_rsp = tp->tf_rsp; 2046 mcp->mc_ss = tp->tf_ss; 2047 mcp->mc_ds = tp->tf_ds; 2048 mcp->mc_es = tp->tf_es; 2049 mcp->mc_fs = tp->tf_fs; 2050 mcp->mc_gs = tp->tf_gs; 2051 mcp->mc_flags = tp->tf_flags; 2052 mcp->mc_len = sizeof(*mcp); 2053 get_fpcontext(td, mcp); 2054 mcp->mc_fsbase = pcb->pcb_fsbase; 2055 mcp->mc_gsbase = pcb->pcb_gsbase; 2056 bzero(mcp->mc_spare, sizeof(mcp->mc_spare)); 2057 return (0); 2058} 2059 2060/* 2061 * Set machine context. 2062 * 2063 * However, we don't set any but the user modifiable flags, and we won't 2064 * touch the cs selector. 2065 */ 2066int 2067set_mcontext(struct thread *td, const mcontext_t *mcp) 2068{ 2069 struct pcb *pcb; 2070 struct trapframe *tp; 2071 long rflags; 2072 int ret; 2073 2074 pcb = td->td_pcb; 2075 tp = td->td_frame; 2076 if (mcp->mc_len != sizeof(*mcp) || 2077 (mcp->mc_flags & ~_MC_FLAG_MASK) != 0) 2078 return (EINVAL); 2079 rflags = (mcp->mc_rflags & PSL_USERCHANGE) | 2080 (tp->tf_rflags & ~PSL_USERCHANGE); 2081 ret = set_fpcontext(td, mcp); 2082 if (ret != 0) 2083 return (ret); 2084 tp->tf_r15 = mcp->mc_r15; 2085 tp->tf_r14 = mcp->mc_r14; 2086 tp->tf_r13 = mcp->mc_r13; 2087 tp->tf_r12 = mcp->mc_r12; 2088 tp->tf_r11 = mcp->mc_r11; 2089 tp->tf_r10 = mcp->mc_r10; 2090 tp->tf_r9 = mcp->mc_r9; 2091 tp->tf_r8 = mcp->mc_r8; 2092 tp->tf_rdi = mcp->mc_rdi; 2093 tp->tf_rsi = mcp->mc_rsi; 2094 tp->tf_rbp = mcp->mc_rbp; 2095 tp->tf_rbx = mcp->mc_rbx; 2096 tp->tf_rdx = mcp->mc_rdx; 2097 tp->tf_rcx = mcp->mc_rcx; 2098 tp->tf_rax = mcp->mc_rax; 2099 tp->tf_rip = mcp->mc_rip; 2100 tp->tf_rflags = rflags; 2101 tp->tf_rsp = mcp->mc_rsp; 2102 tp->tf_ss = mcp->mc_ss; 2103 tp->tf_flags = mcp->mc_flags; 2104 if (tp->tf_flags & TF_HASSEGS) { 2105 tp->tf_ds = mcp->mc_ds; 2106 tp->tf_es = mcp->mc_es; 2107 tp->tf_fs = mcp->mc_fs; 2108 tp->tf_gs = mcp->mc_gs; 2109 } 2110 if (mcp->mc_flags & _MC_HASBASES) { 2111 pcb->pcb_fsbase = mcp->mc_fsbase; 2112 pcb->pcb_gsbase = mcp->mc_gsbase; 2113 } 2114 set_pcb_flags(pcb, PCB_FULL_IRET); 2115 return (0); 2116} 2117 2118static void 2119get_fpcontext(struct thread *td, mcontext_t *mcp) 2120{ 2121 2122 mcp->mc_ownedfp = fpugetregs(td); 2123 bcopy(&td->td_pcb->pcb_user_save, &mcp->mc_fpstate, 2124 sizeof(mcp->mc_fpstate)); 2125 mcp->mc_fpformat = fpuformat(); 2126} 2127 2128static int 2129set_fpcontext(struct thread *td, const mcontext_t *mcp) 2130{ 2131 struct savefpu *fpstate; 2132 2133 if (mcp->mc_fpformat == _MC_FPFMT_NODEV) 2134 return (0); 2135 else if (mcp->mc_fpformat != _MC_FPFMT_XMM) 2136 return (EINVAL); 2137 else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) 2138 /* We don't care what state is left in the FPU or PCB. */ 2139 fpstate_drop(td); 2140 else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || 2141 mcp->mc_ownedfp == _MC_FPOWNED_PCB) { 2142 fpstate = (struct savefpu *)&mcp->mc_fpstate; 2143 fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask; 2144 fpusetregs(td, fpstate); 2145 } else 2146 return (EINVAL); 2147 return (0); 2148} 2149 2150void 2151fpstate_drop(struct thread *td) 2152{ 2153 2154 KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu")); 2155 critical_enter(); 2156 if (PCPU_GET(fpcurthread) == td) 2157 fpudrop(); 2158 /* 2159 * XXX force a full drop of the fpu. The above only drops it if we 2160 * owned it. 2161 * 2162 * XXX I don't much like fpugetuserregs()'s semantics of doing a full 2163 * drop. Dropping only to the pcb matches fnsave's behaviour. 2164 * We only need to drop to !PCB_INITDONE in sendsig(). But 2165 * sendsig() is the only caller of fpugetuserregs()... perhaps we just 2166 * have too many layers. 2167 */ 2168 clear_pcb_flags(curthread->td_pcb, 2169 PCB_FPUINITDONE | PCB_USERFPUINITDONE); 2170 critical_exit(); 2171} 2172 2173int 2174fill_dbregs(struct thread *td, struct dbreg *dbregs) 2175{ 2176 struct pcb *pcb; 2177 2178 if (td == NULL) { 2179 dbregs->dr[0] = rdr0(); 2180 dbregs->dr[1] = rdr1(); 2181 dbregs->dr[2] = rdr2(); 2182 dbregs->dr[3] = rdr3(); 2183 dbregs->dr[6] = rdr6(); 2184 dbregs->dr[7] = rdr7(); 2185 } else { 2186 pcb = td->td_pcb; 2187 dbregs->dr[0] = pcb->pcb_dr0; 2188 dbregs->dr[1] = pcb->pcb_dr1; 2189 dbregs->dr[2] = pcb->pcb_dr2; 2190 dbregs->dr[3] = pcb->pcb_dr3; 2191 dbregs->dr[6] = pcb->pcb_dr6; 2192 dbregs->dr[7] = pcb->pcb_dr7; 2193 } 2194 dbregs->dr[4] = 0; 2195 dbregs->dr[5] = 0; 2196 dbregs->dr[8] = 0; 2197 dbregs->dr[9] = 0; 2198 dbregs->dr[10] = 0; 2199 dbregs->dr[11] = 0; 2200 dbregs->dr[12] = 0; 2201 dbregs->dr[13] = 0; 2202 dbregs->dr[14] = 0; 2203 dbregs->dr[15] = 0; 2204 return (0); 2205} 2206 2207int 2208set_dbregs(struct thread *td, struct dbreg *dbregs) 2209{ 2210 struct pcb *pcb; 2211 int i; 2212 2213 if (td == NULL) { 2214 load_dr0(dbregs->dr[0]); 2215 load_dr1(dbregs->dr[1]); 2216 load_dr2(dbregs->dr[2]); 2217 load_dr3(dbregs->dr[3]); 2218 load_dr6(dbregs->dr[6]); 2219 load_dr7(dbregs->dr[7]); 2220 } else { 2221 /* 2222 * Don't let an illegal value for dr7 get set. Specifically, 2223 * check for undefined settings. Setting these bit patterns 2224 * result in undefined behaviour and can lead to an unexpected 2225 * TRCTRAP or a general protection fault right here. 2226 * Upper bits of dr6 and dr7 must not be set 2227 */ 2228 for (i = 0; i < 4; i++) { 2229 if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02) 2230 return (EINVAL); 2231 if (td->td_frame->tf_cs == _ucode32sel && 2232 DBREG_DR7_LEN(dbregs->dr[7], i) == DBREG_DR7_LEN_8) 2233 return (EINVAL); 2234 } 2235 if ((dbregs->dr[6] & 0xffffffff00000000ul) != 0 || 2236 (dbregs->dr[7] & 0xffffffff00000000ul) != 0) 2237 return (EINVAL); 2238 2239 pcb = td->td_pcb; 2240 2241 /* 2242 * Don't let a process set a breakpoint that is not within the 2243 * process's address space. If a process could do this, it 2244 * could halt the system by setting a breakpoint in the kernel 2245 * (if ddb was enabled). Thus, we need to check to make sure 2246 * that no breakpoints are being enabled for addresses outside 2247 * process's address space. 2248 * 2249 * XXX - what about when the watched area of the user's 2250 * address space is written into from within the kernel 2251 * ... wouldn't that still cause a breakpoint to be generated 2252 * from within kernel mode? 2253 */ 2254 2255 if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) { 2256 /* dr0 is enabled */ 2257 if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) 2258 return (EINVAL); 2259 } 2260 if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) { 2261 /* dr1 is enabled */ 2262 if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) 2263 return (EINVAL); 2264 } 2265 if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) { 2266 /* dr2 is enabled */ 2267 if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) 2268 return (EINVAL); 2269 } 2270 if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) { 2271 /* dr3 is enabled */ 2272 if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) 2273 return (EINVAL); 2274 } 2275 2276 pcb->pcb_dr0 = dbregs->dr[0]; 2277 pcb->pcb_dr1 = dbregs->dr[1]; 2278 pcb->pcb_dr2 = dbregs->dr[2]; 2279 pcb->pcb_dr3 = dbregs->dr[3]; 2280 pcb->pcb_dr6 = dbregs->dr[6]; 2281 pcb->pcb_dr7 = dbregs->dr[7]; 2282 2283 set_pcb_flags(pcb, PCB_DBREGS); 2284 } 2285 2286 return (0); 2287} 2288 2289void 2290reset_dbregs(void) 2291{ 2292 2293 load_dr7(0); /* Turn off the control bits first */ 2294 load_dr0(0); 2295 load_dr1(0); 2296 load_dr2(0); 2297 load_dr3(0); 2298 load_dr6(0); 2299} 2300 2301/* 2302 * Return > 0 if a hardware breakpoint has been hit, and the 2303 * breakpoint was in user space. Return 0, otherwise. 2304 */ 2305int 2306user_dbreg_trap(void) 2307{ 2308 u_int64_t dr7, dr6; /* debug registers dr6 and dr7 */ 2309 u_int64_t bp; /* breakpoint bits extracted from dr6 */ 2310 int nbp; /* number of breakpoints that triggered */ 2311 caddr_t addr[4]; /* breakpoint addresses */ 2312 int i; 2313 2314 dr7 = rdr7(); 2315 if ((dr7 & 0x000000ff) == 0) { 2316 /* 2317 * all GE and LE bits in the dr7 register are zero, 2318 * thus the trap couldn't have been caused by the 2319 * hardware debug registers 2320 */ 2321 return 0; 2322 } 2323 2324 nbp = 0; 2325 dr6 = rdr6(); 2326 bp = dr6 & 0x0000000f; 2327 2328 if (!bp) { 2329 /* 2330 * None of the breakpoint bits are set meaning this 2331 * trap was not caused by any of the debug registers 2332 */ 2333 return 0; 2334 } 2335 2336 /* 2337 * at least one of the breakpoints were hit, check to see 2338 * which ones and if any of them are user space addresses 2339 */ 2340 2341 if (bp & 0x01) { 2342 addr[nbp++] = (caddr_t)rdr0(); 2343 } 2344 if (bp & 0x02) { 2345 addr[nbp++] = (caddr_t)rdr1(); 2346 } 2347 if (bp & 0x04) { 2348 addr[nbp++] = (caddr_t)rdr2(); 2349 } 2350 if (bp & 0x08) { 2351 addr[nbp++] = (caddr_t)rdr3(); 2352 } 2353 2354 for (i = 0; i < nbp; i++) { 2355 if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) { 2356 /* 2357 * addr[i] is in user space 2358 */ 2359 return nbp; 2360 } 2361 } 2362 2363 /* 2364 * None of the breakpoints are in user space. 2365 */ 2366 return 0; 2367} 2368 2369#ifdef KDB 2370 2371/* 2372 * Provide inb() and outb() as functions. They are normally only available as 2373 * inline functions, thus cannot be called from the debugger. 2374 */ 2375 2376/* silence compiler warnings */ 2377u_char inb_(u_short); 2378void outb_(u_short, u_char); 2379 2380u_char 2381inb_(u_short port) 2382{ 2383 return inb(port); 2384} 2385 2386void 2387outb_(u_short port, u_char data) 2388{ 2389 outb(port, data); 2390} 2391 2392#endif /* KDB */ 2393