machdep.c revision 291948
1228753Smm/*- 2228753Smm * Copyright (c) 2003 Peter Wemm. 3228753Smm * Copyright (c) 1992 Terrence R. Lambert. 4228753Smm * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. 5228753Smm * All rights reserved. 6228753Smm * 7228753Smm * This code is derived from software contributed to Berkeley by 8228753Smm * William Jolitz. 9228753Smm * 10228753Smm * Redistribution and use in source and binary forms, with or without 11228753Smm * modification, are permitted provided that the following conditions 12228753Smm * are met: 13228753Smm * 1. Redistributions of source code must retain the above copyright 14228753Smm * notice, this list of conditions and the following disclaimer. 15228753Smm * 2. Redistributions in binary form must reproduce the above copyright 16228753Smm * notice, this list of conditions and the following disclaimer in the 17228753Smm * documentation and/or other materials provided with the distribution. 18228753Smm * 3. All advertising materials mentioning features or use of this software 19228753Smm * must display the following acknowledgement: 20228753Smm * This product includes software developed by the University of 21228753Smm * California, Berkeley and its contributors. 22228753Smm * 4. Neither the name of the University nor the names of its contributors 23228753Smm * may be used to endorse or promote products derived from this software 24228753Smm * without specific prior written permission. 25228753Smm * 26228753Smm * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27228753Smm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28232153Smm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29232153Smm * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30232153Smm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31228753Smm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32228753Smm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33228753Smm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34228753Smm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35228753Smm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36232153Smm * SUCH DAMAGE. 37232153Smm * 38232153Smm * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 39228753Smm */ 40228753Smm 41228753Smm#include <sys/cdefs.h> 42228753Smm__FBSDID("$FreeBSD: head/sys/amd64/amd64/machdep.c 291948 2015-12-07 17:24:55Z kib $"); 43228753Smm 44228753Smm#include "opt_atpic.h" 45228753Smm#include "opt_compat.h" 46228753Smm#include "opt_cpu.h" 47228753Smm#include "opt_ddb.h" 48228753Smm#include "opt_inet.h" 49228753Smm#include "opt_isa.h" 50228753Smm#include "opt_kstack_pages.h" 51228753Smm#include "opt_maxmem.h" 52228753Smm#include "opt_mp_watchdog.h" 53228753Smm#include "opt_perfmon.h" 54228753Smm#include "opt_platform.h" 55228753Smm#include "opt_sched.h" 56228753Smm 57228753Smm#include <sys/param.h> 58228753Smm#include <sys/proc.h> 59228753Smm#include <sys/systm.h> 60228753Smm#include <sys/bio.h> 61228753Smm#include <sys/buf.h> 62228753Smm#include <sys/bus.h> 63228753Smm#include <sys/callout.h> 64228753Smm#include <sys/cons.h> 65228753Smm#include <sys/cpu.h> 66228753Smm#include <sys/efi.h> 67228753Smm#include <sys/eventhandler.h> 68228753Smm#include <sys/exec.h> 69228753Smm#include <sys/imgact.h> 70228753Smm#include <sys/kdb.h> 71228753Smm#include <sys/kernel.h> 72228753Smm#include <sys/ktr.h> 73228753Smm#include <sys/linker.h> 74228753Smm#include <sys/lock.h> 75228753Smm#include <sys/malloc.h> 76228753Smm#include <sys/memrange.h> 77228753Smm#include <sys/msgbuf.h> 78228753Smm#include <sys/mutex.h> 79228753Smm#include <sys/pcpu.h> 80228753Smm#include <sys/ptrace.h> 81228753Smm#include <sys/reboot.h> 82228753Smm#include <sys/rwlock.h> 83228753Smm#include <sys/sched.h> 84232153Smm#include <sys/signalvar.h> 85232153Smm#ifdef SMP 86232153Smm#include <sys/smp.h> 87232153Smm#endif 88232153Smm#include <sys/syscallsubr.h> 89232153Smm#include <sys/sysctl.h> 90232153Smm#include <sys/sysent.h> 91232153Smm#include <sys/sysproto.h> 92232153Smm#include <sys/ucontext.h> 93232153Smm#include <sys/vmmeter.h> 94232153Smm 95232153Smm#include <vm/vm.h> 96232153Smm#include <vm/vm_extern.h> 97232153Smm#include <vm/vm_kern.h> 98228753Smm#include <vm/vm_page.h> 99228753Smm#include <vm/vm_map.h> 100228753Smm#include <vm/vm_object.h> 101228753Smm#include <vm/vm_pager.h> 102228753Smm#include <vm/vm_param.h> 103228753Smm 104228753Smm#ifdef DDB 105#ifndef KDB 106#error KDB must be enabled in order for DDB to work! 107#endif 108#include <ddb/ddb.h> 109#include <ddb/db_sym.h> 110#endif 111 112#include <net/netisr.h> 113 114#include <machine/clock.h> 115#include <machine/cpu.h> 116#include <machine/cputypes.h> 117#include <machine/intr_machdep.h> 118#include <x86/mca.h> 119#include <machine/md_var.h> 120#include <machine/metadata.h> 121#include <machine/mp_watchdog.h> 122#include <machine/pc/bios.h> 123#include <machine/pcb.h> 124#include <machine/proc.h> 125#include <machine/reg.h> 126#include <machine/sigframe.h> 127#include <machine/specialreg.h> 128#ifdef PERFMON 129#include <machine/perfmon.h> 130#endif 131#include <machine/tss.h> 132#ifdef SMP 133#include <machine/smp.h> 134#endif 135#ifdef FDT 136#include <x86/fdt.h> 137#endif 138 139#ifdef DEV_ATPIC 140#include <x86/isa/icu.h> 141#else 142#include <x86/apicvar.h> 143#endif 144 145#include <isa/isareg.h> 146#include <isa/rtc.h> 147#include <x86/init.h> 148 149/* Sanity check for __curthread() */ 150CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); 151 152extern u_int64_t hammer_time(u_int64_t, u_int64_t); 153 154#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 155#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 156 157static void cpu_startup(void *); 158static void get_fpcontext(struct thread *td, mcontext_t *mcp, 159 char *xfpusave, size_t xfpusave_len); 160static int set_fpcontext(struct thread *td, mcontext_t *mcp, 161 char *xfpustate, size_t xfpustate_len); 162SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); 163 164/* Preload data parse function */ 165static caddr_t native_parse_preload_data(u_int64_t); 166 167/* Native function to fetch and parse the e820 map */ 168static void native_parse_memmap(caddr_t, vm_paddr_t *, int *); 169 170/* Default init_ops implementation. */ 171struct init_ops init_ops = { 172 .parse_preload_data = native_parse_preload_data, 173 .early_clock_source_init = i8254_init, 174 .early_delay = i8254_delay, 175 .parse_memmap = native_parse_memmap, 176#ifdef SMP 177 .mp_bootaddress = mp_bootaddress, 178 .start_all_aps = native_start_all_aps, 179#endif 180 .msi_init = msi_init, 181}; 182 183/* 184 * The file "conf/ldscript.amd64" defines the symbol "kernphys". Its value is 185 * the physical address at which the kernel is loaded. 186 */ 187extern char kernphys[]; 188 189struct msgbuf *msgbufp; 190 191/* Intel ICH registers */ 192#define ICH_PMBASE 0x400 193#define ICH_SMI_EN ICH_PMBASE + 0x30 194 195int _udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel; 196 197int cold = 1; 198 199long Maxmem = 0; 200long realmem = 0; 201 202/* 203 * The number of PHYSMAP entries must be one less than the number of 204 * PHYSSEG entries because the PHYSMAP entry that spans the largest 205 * physical address that is accessible by ISA DMA is split into two 206 * PHYSSEG entries. 207 */ 208#define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) 209 210vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; 211vm_paddr_t dump_avail[PHYSMAP_SIZE + 2]; 212 213/* must be 2 less so 0 0 can signal end of chunks */ 214#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2) 215#define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2) 216 217struct kva_md_info kmi; 218 219static struct trapframe proc0_tf; 220struct region_descriptor r_gdt, r_idt; 221 222struct pcpu __pcpu[MAXCPU]; 223 224struct mtx icu_lock; 225 226struct mem_range_softc mem_range_softc; 227 228struct mtx dt_lock; /* lock for GDT and LDT */ 229 230void (*vmm_resume_p)(void); 231 232static void 233cpu_startup(dummy) 234 void *dummy; 235{ 236 uintmax_t memsize; 237 char *sysenv; 238 239 /* 240 * On MacBooks, we need to disallow the legacy USB circuit to 241 * generate an SMI# because this can cause several problems, 242 * namely: incorrect CPU frequency detection and failure to 243 * start the APs. 244 * We do this by disabling a bit in the SMI_EN (SMI Control and 245 * Enable register) of the Intel ICH LPC Interface Bridge. 246 */ 247 sysenv = kern_getenv("smbios.system.product"); 248 if (sysenv != NULL) { 249 if (strncmp(sysenv, "MacBook1,1", 10) == 0 || 250 strncmp(sysenv, "MacBook3,1", 10) == 0 || 251 strncmp(sysenv, "MacBook4,1", 10) == 0 || 252 strncmp(sysenv, "MacBookPro1,1", 13) == 0 || 253 strncmp(sysenv, "MacBookPro1,2", 13) == 0 || 254 strncmp(sysenv, "MacBookPro3,1", 13) == 0 || 255 strncmp(sysenv, "MacBookPro4,1", 13) == 0 || 256 strncmp(sysenv, "Macmini1,1", 10) == 0) { 257 if (bootverbose) 258 printf("Disabling LEGACY_USB_EN bit on " 259 "Intel ICH.\n"); 260 outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); 261 } 262 freeenv(sysenv); 263 } 264 265 /* 266 * Good {morning,afternoon,evening,night}. 267 */ 268 startrtclock(); 269 printcpuinfo(); 270 panicifcpuunsupported(); 271#ifdef PERFMON 272 perfmon_init(); 273#endif 274 275 /* 276 * Display physical memory if SMBIOS reports reasonable amount. 277 */ 278 memsize = 0; 279 sysenv = kern_getenv("smbios.memory.enabled"); 280 if (sysenv != NULL) { 281 memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10; 282 freeenv(sysenv); 283 } 284 if (memsize < ptoa((uintmax_t)vm_cnt.v_free_count)) 285 memsize = ptoa((uintmax_t)Maxmem); 286 printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20); 287 realmem = atop(memsize); 288 289 /* 290 * Display any holes after the first chunk of extended memory. 291 */ 292 if (bootverbose) { 293 int indx; 294 295 printf("Physical memory chunk(s):\n"); 296 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { 297 vm_paddr_t size; 298 299 size = phys_avail[indx + 1] - phys_avail[indx]; 300 printf( 301 "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", 302 (uintmax_t)phys_avail[indx], 303 (uintmax_t)phys_avail[indx + 1] - 1, 304 (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); 305 } 306 } 307 308 vm_ksubmap_init(&kmi); 309 310 printf("avail memory = %ju (%ju MB)\n", 311 ptoa((uintmax_t)vm_cnt.v_free_count), 312 ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576); 313 314 /* 315 * Set up buffers, so they can be used to read disk labels. 316 */ 317 bufinit(); 318 vm_pager_bufferinit(); 319 320 cpu_setregs(); 321} 322 323/* 324 * Send an interrupt to process. 325 * 326 * Stack is set up to allow sigcode stored 327 * at top to call routine, followed by call 328 * to sigreturn routine below. After sigreturn 329 * resets the signal mask, the stack, and the 330 * frame pointer, it returns to the user 331 * specified pc, psl. 332 */ 333void 334sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 335{ 336 struct sigframe sf, *sfp; 337 struct pcb *pcb; 338 struct proc *p; 339 struct thread *td; 340 struct sigacts *psp; 341 char *sp; 342 struct trapframe *regs; 343 char *xfpusave; 344 size_t xfpusave_len; 345 int sig; 346 int oonstack; 347 348 td = curthread; 349 pcb = td->td_pcb; 350 p = td->td_proc; 351 PROC_LOCK_ASSERT(p, MA_OWNED); 352 sig = ksi->ksi_signo; 353 psp = p->p_sigacts; 354 mtx_assert(&psp->ps_mtx, MA_OWNED); 355 regs = td->td_frame; 356 oonstack = sigonstack(regs->tf_rsp); 357 358 if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) { 359 xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu); 360 xfpusave = __builtin_alloca(xfpusave_len); 361 } else { 362 xfpusave_len = 0; 363 xfpusave = NULL; 364 } 365 366 /* Save user context. */ 367 bzero(&sf, sizeof(sf)); 368 sf.sf_uc.uc_sigmask = *mask; 369 sf.sf_uc.uc_stack = td->td_sigstk; 370 sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 371 ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; 372 sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; 373 bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs)); 374 sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ 375 get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); 376 fpstate_drop(td); 377 sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase; 378 sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase; 379 bzero(sf.sf_uc.uc_mcontext.mc_spare, 380 sizeof(sf.sf_uc.uc_mcontext.mc_spare)); 381 bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); 382 383 /* Allocate space for the signal handler context. */ 384 if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && 385 SIGISMEMBER(psp->ps_sigonstack, sig)) { 386 sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size; 387#if defined(COMPAT_43) 388 td->td_sigstk.ss_flags |= SS_ONSTACK; 389#endif 390 } else 391 sp = (char *)regs->tf_rsp - 128; 392 if (xfpusave != NULL) { 393 sp -= xfpusave_len; 394 sp = (char *)((unsigned long)sp & ~0x3Ful); 395 sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp; 396 } 397 sp -= sizeof(struct sigframe); 398 /* Align to 16 bytes. */ 399 sfp = (struct sigframe *)((unsigned long)sp & ~0xFul); 400 401 /* Build the argument list for the signal handler. */ 402 regs->tf_rdi = sig; /* arg 1 in %rdi */ 403 regs->tf_rdx = (register_t)&sfp->sf_uc; /* arg 3 in %rdx */ 404 bzero(&sf.sf_si, sizeof(sf.sf_si)); 405 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 406 /* Signal handler installed with SA_SIGINFO. */ 407 regs->tf_rsi = (register_t)&sfp->sf_si; /* arg 2 in %rsi */ 408 sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; 409 410 /* Fill in POSIX parts */ 411 sf.sf_si = ksi->ksi_info; 412 sf.sf_si.si_signo = sig; /* maybe a translated signal */ 413 regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */ 414 } else { 415 /* Old FreeBSD-style arguments. */ 416 regs->tf_rsi = ksi->ksi_code; /* arg 2 in %rsi */ 417 regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */ 418 sf.sf_ahu.sf_handler = catcher; 419 } 420 mtx_unlock(&psp->ps_mtx); 421 PROC_UNLOCK(p); 422 423 /* 424 * Copy the sigframe out to the user's stack. 425 */ 426 if (copyout(&sf, sfp, sizeof(*sfp)) != 0 || 427 (xfpusave != NULL && copyout(xfpusave, 428 (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len) 429 != 0)) { 430#ifdef DEBUG 431 printf("process %ld has trashed its stack\n", (long)p->p_pid); 432#endif 433 PROC_LOCK(p); 434 sigexit(td, SIGILL); 435 } 436 437 regs->tf_rsp = (long)sfp; 438 regs->tf_rip = p->p_sysent->sv_sigcode_base; 439 regs->tf_rflags &= ~(PSL_T | PSL_D); 440 regs->tf_cs = _ucodesel; 441 regs->tf_ds = _udatasel; 442 regs->tf_ss = _udatasel; 443 regs->tf_es = _udatasel; 444 regs->tf_fs = _ufssel; 445 regs->tf_gs = _ugssel; 446 regs->tf_flags = TF_HASSEGS; 447 set_pcb_flags(pcb, PCB_FULL_IRET); 448 PROC_LOCK(p); 449 mtx_lock(&psp->ps_mtx); 450} 451 452/* 453 * System call to cleanup state after a signal 454 * has been taken. Reset signal mask and 455 * stack state from context left by sendsig (above). 456 * Return to previous pc and psl as specified by 457 * context left by sendsig. Check carefully to 458 * make sure that the user has not modified the 459 * state to gain improper privileges. 460 * 461 * MPSAFE 462 */ 463int 464sys_sigreturn(td, uap) 465 struct thread *td; 466 struct sigreturn_args /* { 467 const struct __ucontext *sigcntxp; 468 } */ *uap; 469{ 470 ucontext_t uc; 471 struct pcb *pcb; 472 struct proc *p; 473 struct trapframe *regs; 474 ucontext_t *ucp; 475 char *xfpustate; 476 size_t xfpustate_len; 477 long rflags; 478 int cs, error, ret; 479 ksiginfo_t ksi; 480 481 pcb = td->td_pcb; 482 p = td->td_proc; 483 484 error = copyin(uap->sigcntxp, &uc, sizeof(uc)); 485 if (error != 0) { 486 uprintf("pid %d (%s): sigreturn copyin failed\n", 487 p->p_pid, td->td_name); 488 return (error); 489 } 490 ucp = &uc; 491 if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) { 492 uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid, 493 td->td_name, ucp->uc_mcontext.mc_flags); 494 return (EINVAL); 495 } 496 regs = td->td_frame; 497 rflags = ucp->uc_mcontext.mc_rflags; 498 /* 499 * Don't allow users to change privileged or reserved flags. 500 */ 501 if (!EFL_SECURE(rflags, regs->tf_rflags)) { 502 uprintf("pid %d (%s): sigreturn rflags = 0x%lx\n", p->p_pid, 503 td->td_name, rflags); 504 return (EINVAL); 505 } 506 507 /* 508 * Don't allow users to load a valid privileged %cs. Let the 509 * hardware check for invalid selectors, excess privilege in 510 * other selectors, invalid %eip's and invalid %esp's. 511 */ 512 cs = ucp->uc_mcontext.mc_cs; 513 if (!CS_SECURE(cs)) { 514 uprintf("pid %d (%s): sigreturn cs = 0x%x\n", p->p_pid, 515 td->td_name, cs); 516 ksiginfo_init_trap(&ksi); 517 ksi.ksi_signo = SIGBUS; 518 ksi.ksi_code = BUS_OBJERR; 519 ksi.ksi_trapno = T_PROTFLT; 520 ksi.ksi_addr = (void *)regs->tf_rip; 521 trapsignal(td, &ksi); 522 return (EINVAL); 523 } 524 525 if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) { 526 xfpustate_len = uc.uc_mcontext.mc_xfpustate_len; 527 if (xfpustate_len > cpu_max_ext_state_size - 528 sizeof(struct savefpu)) { 529 uprintf("pid %d (%s): sigreturn xfpusave_len = 0x%zx\n", 530 p->p_pid, td->td_name, xfpustate_len); 531 return (EINVAL); 532 } 533 xfpustate = __builtin_alloca(xfpustate_len); 534 error = copyin((const void *)uc.uc_mcontext.mc_xfpustate, 535 xfpustate, xfpustate_len); 536 if (error != 0) { 537 uprintf( 538 "pid %d (%s): sigreturn copying xfpustate failed\n", 539 p->p_pid, td->td_name); 540 return (error); 541 } 542 } else { 543 xfpustate = NULL; 544 xfpustate_len = 0; 545 } 546 ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len); 547 if (ret != 0) { 548 uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n", 549 p->p_pid, td->td_name, ret); 550 return (ret); 551 } 552 bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs)); 553 pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase; 554 pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase; 555 556#if defined(COMPAT_43) 557 if (ucp->uc_mcontext.mc_onstack & 1) 558 td->td_sigstk.ss_flags |= SS_ONSTACK; 559 else 560 td->td_sigstk.ss_flags &= ~SS_ONSTACK; 561#endif 562 563 kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); 564 set_pcb_flags(pcb, PCB_FULL_IRET); 565 return (EJUSTRETURN); 566} 567 568#ifdef COMPAT_FREEBSD4 569int 570freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) 571{ 572 573 return sys_sigreturn(td, (struct sigreturn_args *)uap); 574} 575#endif 576 577/* 578 * Reset registers to default values on exec. 579 */ 580void 581exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) 582{ 583 struct trapframe *regs = td->td_frame; 584 struct pcb *pcb = td->td_pcb; 585 586 mtx_lock(&dt_lock); 587 if (td->td_proc->p_md.md_ldt != NULL) 588 user_ldt_free(td); 589 else 590 mtx_unlock(&dt_lock); 591 592 pcb->pcb_fsbase = 0; 593 pcb->pcb_gsbase = 0; 594 clear_pcb_flags(pcb, PCB_32BIT); 595 pcb->pcb_initial_fpucw = __INITIAL_FPUCW__; 596 set_pcb_flags(pcb, PCB_FULL_IRET); 597 598 bzero((char *)regs, sizeof(struct trapframe)); 599 regs->tf_rip = imgp->entry_addr; 600 regs->tf_rsp = ((stack - 8) & ~0xFul) + 8; 601 regs->tf_rdi = stack; /* argv */ 602 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); 603 regs->tf_ss = _udatasel; 604 regs->tf_cs = _ucodesel; 605 regs->tf_ds = _udatasel; 606 regs->tf_es = _udatasel; 607 regs->tf_fs = _ufssel; 608 regs->tf_gs = _ugssel; 609 regs->tf_flags = TF_HASSEGS; 610 td->td_retval[1] = 0; 611 612 /* 613 * Reset the hardware debug registers if they were in use. 614 * They won't have any meaning for the newly exec'd process. 615 */ 616 if (pcb->pcb_flags & PCB_DBREGS) { 617 pcb->pcb_dr0 = 0; 618 pcb->pcb_dr1 = 0; 619 pcb->pcb_dr2 = 0; 620 pcb->pcb_dr3 = 0; 621 pcb->pcb_dr6 = 0; 622 pcb->pcb_dr7 = 0; 623 if (pcb == curpcb) { 624 /* 625 * Clear the debug registers on the running 626 * CPU, otherwise they will end up affecting 627 * the next process we switch to. 628 */ 629 reset_dbregs(); 630 } 631 clear_pcb_flags(pcb, PCB_DBREGS); 632 } 633 634 /* 635 * Drop the FP state if we hold it, so that the process gets a 636 * clean FP state if it uses the FPU again. 637 */ 638 fpstate_drop(td); 639} 640 641void 642cpu_setregs(void) 643{ 644 register_t cr0; 645 646 cr0 = rcr0(); 647 /* 648 * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the 649 * BSP. See the comments there about why we set them. 650 */ 651 cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; 652 load_cr0(cr0); 653} 654 655/* 656 * Initialize amd64 and configure to run kernel 657 */ 658 659/* 660 * Initialize segments & interrupt table 661 */ 662 663struct user_segment_descriptor gdt[NGDT * MAXCPU];/* global descriptor tables */ 664static struct gate_descriptor idt0[NIDT]; 665struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ 666 667static char dblfault_stack[PAGE_SIZE] __aligned(16); 668 669static char nmi0_stack[PAGE_SIZE] __aligned(16); 670CTASSERT(sizeof(struct nmi_pcpu) == 16); 671 672struct amd64tss common_tss[MAXCPU]; 673 674/* 675 * Software prototypes -- in more palatable form. 676 * 677 * Keep GUFS32, GUGS32, GUCODE32 and GUDATA at the same 678 * slots as corresponding segments for i386 kernel. 679 */ 680struct soft_segment_descriptor gdt_segs[] = { 681/* GNULL_SEL 0 Null Descriptor */ 682{ .ssd_base = 0x0, 683 .ssd_limit = 0x0, 684 .ssd_type = 0, 685 .ssd_dpl = 0, 686 .ssd_p = 0, 687 .ssd_long = 0, 688 .ssd_def32 = 0, 689 .ssd_gran = 0 }, 690/* GNULL2_SEL 1 Null Descriptor */ 691{ .ssd_base = 0x0, 692 .ssd_limit = 0x0, 693 .ssd_type = 0, 694 .ssd_dpl = 0, 695 .ssd_p = 0, 696 .ssd_long = 0, 697 .ssd_def32 = 0, 698 .ssd_gran = 0 }, 699/* GUFS32_SEL 2 32 bit %gs Descriptor for user */ 700{ .ssd_base = 0x0, 701 .ssd_limit = 0xfffff, 702 .ssd_type = SDT_MEMRWA, 703 .ssd_dpl = SEL_UPL, 704 .ssd_p = 1, 705 .ssd_long = 0, 706 .ssd_def32 = 1, 707 .ssd_gran = 1 }, 708/* GUGS32_SEL 3 32 bit %fs Descriptor for user */ 709{ .ssd_base = 0x0, 710 .ssd_limit = 0xfffff, 711 .ssd_type = SDT_MEMRWA, 712 .ssd_dpl = SEL_UPL, 713 .ssd_p = 1, 714 .ssd_long = 0, 715 .ssd_def32 = 1, 716 .ssd_gran = 1 }, 717/* GCODE_SEL 4 Code Descriptor for kernel */ 718{ .ssd_base = 0x0, 719 .ssd_limit = 0xfffff, 720 .ssd_type = SDT_MEMERA, 721 .ssd_dpl = SEL_KPL, 722 .ssd_p = 1, 723 .ssd_long = 1, 724 .ssd_def32 = 0, 725 .ssd_gran = 1 }, 726/* GDATA_SEL 5 Data Descriptor for kernel */ 727{ .ssd_base = 0x0, 728 .ssd_limit = 0xfffff, 729 .ssd_type = SDT_MEMRWA, 730 .ssd_dpl = SEL_KPL, 731 .ssd_p = 1, 732 .ssd_long = 1, 733 .ssd_def32 = 0, 734 .ssd_gran = 1 }, 735/* GUCODE32_SEL 6 32 bit Code Descriptor for user */ 736{ .ssd_base = 0x0, 737 .ssd_limit = 0xfffff, 738 .ssd_type = SDT_MEMERA, 739 .ssd_dpl = SEL_UPL, 740 .ssd_p = 1, 741 .ssd_long = 0, 742 .ssd_def32 = 1, 743 .ssd_gran = 1 }, 744/* GUDATA_SEL 7 32/64 bit Data Descriptor for user */ 745{ .ssd_base = 0x0, 746 .ssd_limit = 0xfffff, 747 .ssd_type = SDT_MEMRWA, 748 .ssd_dpl = SEL_UPL, 749 .ssd_p = 1, 750 .ssd_long = 0, 751 .ssd_def32 = 1, 752 .ssd_gran = 1 }, 753/* GUCODE_SEL 8 64 bit Code Descriptor for user */ 754{ .ssd_base = 0x0, 755 .ssd_limit = 0xfffff, 756 .ssd_type = SDT_MEMERA, 757 .ssd_dpl = SEL_UPL, 758 .ssd_p = 1, 759 .ssd_long = 1, 760 .ssd_def32 = 0, 761 .ssd_gran = 1 }, 762/* GPROC0_SEL 9 Proc 0 Tss Descriptor */ 763{ .ssd_base = 0x0, 764 .ssd_limit = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE - 1, 765 .ssd_type = SDT_SYSTSS, 766 .ssd_dpl = SEL_KPL, 767 .ssd_p = 1, 768 .ssd_long = 0, 769 .ssd_def32 = 0, 770 .ssd_gran = 0 }, 771/* Actually, the TSS is a system descriptor which is double size */ 772{ .ssd_base = 0x0, 773 .ssd_limit = 0x0, 774 .ssd_type = 0, 775 .ssd_dpl = 0, 776 .ssd_p = 0, 777 .ssd_long = 0, 778 .ssd_def32 = 0, 779 .ssd_gran = 0 }, 780/* GUSERLDT_SEL 11 LDT Descriptor */ 781{ .ssd_base = 0x0, 782 .ssd_limit = 0x0, 783 .ssd_type = 0, 784 .ssd_dpl = 0, 785 .ssd_p = 0, 786 .ssd_long = 0, 787 .ssd_def32 = 0, 788 .ssd_gran = 0 }, 789/* GUSERLDT_SEL 12 LDT Descriptor, double size */ 790{ .ssd_base = 0x0, 791 .ssd_limit = 0x0, 792 .ssd_type = 0, 793 .ssd_dpl = 0, 794 .ssd_p = 0, 795 .ssd_long = 0, 796 .ssd_def32 = 0, 797 .ssd_gran = 0 }, 798}; 799 800void 801setidt(int idx, inthand_t *func, int typ, int dpl, int ist) 802{ 803 struct gate_descriptor *ip; 804 805 ip = idt + idx; 806 ip->gd_looffset = (uintptr_t)func; 807 ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL); 808 ip->gd_ist = ist; 809 ip->gd_xx = 0; 810 ip->gd_type = typ; 811 ip->gd_dpl = dpl; 812 ip->gd_p = 1; 813 ip->gd_hioffset = ((uintptr_t)func)>>16 ; 814} 815 816extern inthand_t 817 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), 818 IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), 819 IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), 820 IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), 821 IDTVEC(xmm), IDTVEC(dblfault), 822#ifdef KDTRACE_HOOKS 823 IDTVEC(dtrace_ret), 824#endif 825#ifdef XENHVM 826 IDTVEC(xen_intr_upcall), 827#endif 828 IDTVEC(fast_syscall), IDTVEC(fast_syscall32); 829 830#ifdef DDB 831/* 832 * Display the index and function name of any IDT entries that don't use 833 * the default 'rsvd' entry point. 834 */ 835DB_SHOW_COMMAND(idt, db_show_idt) 836{ 837 struct gate_descriptor *ip; 838 int idx; 839 uintptr_t func; 840 841 ip = idt; 842 for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { 843 func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); 844 if (func != (uintptr_t)&IDTVEC(rsvd)) { 845 db_printf("%3d\t", idx); 846 db_printsym(func, DB_STGY_PROC); 847 db_printf("\n"); 848 } 849 ip++; 850 } 851} 852 853/* Show privileged registers. */ 854DB_SHOW_COMMAND(sysregs, db_show_sysregs) 855{ 856 struct { 857 uint16_t limit; 858 uint64_t base; 859 } __packed idtr, gdtr; 860 uint16_t ldt, tr; 861 862 __asm __volatile("sidt %0" : "=m" (idtr)); 863 db_printf("idtr\t0x%016lx/%04x\n", 864 (u_long)idtr.base, (u_int)idtr.limit); 865 __asm __volatile("sgdt %0" : "=m" (gdtr)); 866 db_printf("gdtr\t0x%016lx/%04x\n", 867 (u_long)gdtr.base, (u_int)gdtr.limit); 868 __asm __volatile("sldt %0" : "=r" (ldt)); 869 db_printf("ldtr\t0x%04x\n", ldt); 870 __asm __volatile("str %0" : "=r" (tr)); 871 db_printf("tr\t0x%04x\n", tr); 872 db_printf("cr0\t0x%016lx\n", rcr0()); 873 db_printf("cr2\t0x%016lx\n", rcr2()); 874 db_printf("cr3\t0x%016lx\n", rcr3()); 875 db_printf("cr4\t0x%016lx\n", rcr4()); 876 if (rcr4() & CR4_XSAVE) 877 db_printf("xcr0\t0x%016lx\n", rxcr(0)); 878 db_printf("EFER\t0x%016lx\n", rdmsr(MSR_EFER)); 879 if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX)) 880 db_printf("FEATURES_CTL\t%016lx\n", 881 rdmsr(MSR_IA32_FEATURE_CONTROL)); 882 db_printf("DEBUG_CTL\t0x%016lx\n", rdmsr(MSR_DEBUGCTLMSR)); 883 db_printf("PAT\t0x%016lx\n", rdmsr(MSR_PAT)); 884 db_printf("GSBASE\t0x%016lx\n", rdmsr(MSR_GSBASE)); 885} 886 887DB_SHOW_COMMAND(dbregs, db_show_dbregs) 888{ 889 890 db_printf("dr0\t0x%016lx\n", rdr0()); 891 db_printf("dr1\t0x%016lx\n", rdr1()); 892 db_printf("dr2\t0x%016lx\n", rdr2()); 893 db_printf("dr3\t0x%016lx\n", rdr3()); 894 db_printf("dr6\t0x%016lx\n", rdr6()); 895 db_printf("dr7\t0x%016lx\n", rdr7()); 896} 897#endif 898 899void 900sdtossd(sd, ssd) 901 struct user_segment_descriptor *sd; 902 struct soft_segment_descriptor *ssd; 903{ 904 905 ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; 906 ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; 907 ssd->ssd_type = sd->sd_type; 908 ssd->ssd_dpl = sd->sd_dpl; 909 ssd->ssd_p = sd->sd_p; 910 ssd->ssd_long = sd->sd_long; 911 ssd->ssd_def32 = sd->sd_def32; 912 ssd->ssd_gran = sd->sd_gran; 913} 914 915void 916ssdtosd(ssd, sd) 917 struct soft_segment_descriptor *ssd; 918 struct user_segment_descriptor *sd; 919{ 920 921 sd->sd_lobase = (ssd->ssd_base) & 0xffffff; 922 sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff; 923 sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; 924 sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; 925 sd->sd_type = ssd->ssd_type; 926 sd->sd_dpl = ssd->ssd_dpl; 927 sd->sd_p = ssd->ssd_p; 928 sd->sd_long = ssd->ssd_long; 929 sd->sd_def32 = ssd->ssd_def32; 930 sd->sd_gran = ssd->ssd_gran; 931} 932 933void 934ssdtosyssd(ssd, sd) 935 struct soft_segment_descriptor *ssd; 936 struct system_segment_descriptor *sd; 937{ 938 939 sd->sd_lobase = (ssd->ssd_base) & 0xffffff; 940 sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful; 941 sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; 942 sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; 943 sd->sd_type = ssd->ssd_type; 944 sd->sd_dpl = ssd->ssd_dpl; 945 sd->sd_p = ssd->ssd_p; 946 sd->sd_gran = ssd->ssd_gran; 947} 948 949#if !defined(DEV_ATPIC) && defined(DEV_ISA) 950#include <isa/isavar.h> 951#include <isa/isareg.h> 952/* 953 * Return a bitmap of the current interrupt requests. This is 8259-specific 954 * and is only suitable for use at probe time. 955 * This is only here to pacify sio. It is NOT FATAL if this doesn't work. 956 * It shouldn't be here. There should probably be an APIC centric 957 * implementation in the apic driver code, if at all. 958 */ 959intrmask_t 960isa_irq_pending(void) 961{ 962 u_char irr1; 963 u_char irr2; 964 965 irr1 = inb(IO_ICU1); 966 irr2 = inb(IO_ICU2); 967 return ((irr2 << 8) | irr1); 968} 969#endif 970 971u_int basemem; 972 973static int 974add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, 975 int *physmap_idxp) 976{ 977 int i, insert_idx, physmap_idx; 978 979 physmap_idx = *physmap_idxp; 980 981 if (length == 0) 982 return (1); 983 984 /* 985 * Find insertion point while checking for overlap. Start off by 986 * assuming the new entry will be added to the end. 987 * 988 * NB: physmap_idx points to the next free slot. 989 */ 990 insert_idx = physmap_idx; 991 for (i = 0; i <= physmap_idx; i += 2) { 992 if (base < physmap[i + 1]) { 993 if (base + length <= physmap[i]) { 994 insert_idx = i; 995 break; 996 } 997 if (boothowto & RB_VERBOSE) 998 printf( 999 "Overlapping memory regions, ignoring second region\n"); 1000 return (1); 1001 } 1002 } 1003 1004 /* See if we can prepend to the next entry. */ 1005 if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) { 1006 physmap[insert_idx] = base; 1007 return (1); 1008 } 1009 1010 /* See if we can append to the previous entry. */ 1011 if (insert_idx > 0 && base == physmap[insert_idx - 1]) { 1012 physmap[insert_idx - 1] += length; 1013 return (1); 1014 } 1015 1016 physmap_idx += 2; 1017 *physmap_idxp = physmap_idx; 1018 if (physmap_idx == PHYSMAP_SIZE) { 1019 printf( 1020 "Too many segments in the physical address map, giving up\n"); 1021 return (0); 1022 } 1023 1024 /* 1025 * Move the last 'N' entries down to make room for the new 1026 * entry if needed. 1027 */ 1028 for (i = (physmap_idx - 2); i > insert_idx; i -= 2) { 1029 physmap[i] = physmap[i - 2]; 1030 physmap[i + 1] = physmap[i - 1]; 1031 } 1032 1033 /* Insert the new entry. */ 1034 physmap[insert_idx] = base; 1035 physmap[insert_idx + 1] = base + length; 1036 return (1); 1037} 1038 1039void 1040bios_add_smap_entries(struct bios_smap *smapbase, u_int32_t smapsize, 1041 vm_paddr_t *physmap, int *physmap_idx) 1042{ 1043 struct bios_smap *smap, *smapend; 1044 1045 smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); 1046 1047 for (smap = smapbase; smap < smapend; smap++) { 1048 if (boothowto & RB_VERBOSE) 1049 printf("SMAP type=%02x base=%016lx len=%016lx\n", 1050 smap->type, smap->base, smap->length); 1051 1052 if (smap->type != SMAP_TYPE_MEMORY) 1053 continue; 1054 1055 if (!add_physmap_entry(smap->base, smap->length, physmap, 1056 physmap_idx)) 1057 break; 1058 } 1059} 1060 1061#define efi_next_descriptor(ptr, size) \ 1062 ((struct efi_md *)(((uint8_t *) ptr) + size)) 1063 1064static void 1065add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap, 1066 int *physmap_idx) 1067{ 1068 struct efi_md *map, *p; 1069 const char *type; 1070 size_t efisz; 1071 int ndesc, i; 1072 1073 static const char *types[] = { 1074 "Reserved", 1075 "LoaderCode", 1076 "LoaderData", 1077 "BootServicesCode", 1078 "BootServicesData", 1079 "RuntimeServicesCode", 1080 "RuntimeServicesData", 1081 "ConventionalMemory", 1082 "UnusableMemory", 1083 "ACPIReclaimMemory", 1084 "ACPIMemoryNVS", 1085 "MemoryMappedIO", 1086 "MemoryMappedIOPortSpace", 1087 "PalCode" 1088 }; 1089 1090 /* 1091 * Memory map data provided by UEFI via the GetMemoryMap 1092 * Boot Services API. 1093 */ 1094 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; 1095 map = (struct efi_md *)((uint8_t *)efihdr + efisz); 1096 1097 if (efihdr->descriptor_size == 0) 1098 return; 1099 ndesc = efihdr->memory_size / efihdr->descriptor_size; 1100 1101 if (boothowto & RB_VERBOSE) 1102 printf("%23s %12s %12s %8s %4s\n", 1103 "Type", "Physical", "Virtual", "#Pages", "Attr"); 1104 1105 for (i = 0, p = map; i < ndesc; i++, 1106 p = efi_next_descriptor(p, efihdr->descriptor_size)) { 1107 if (boothowto & RB_VERBOSE) { 1108 if (p->md_type <= EFI_MD_TYPE_PALCODE) 1109 type = types[p->md_type]; 1110 else 1111 type = "<INVALID>"; 1112 printf("%23s %012lx %12p %08lx ", type, p->md_phys, 1113 p->md_virt, p->md_pages); 1114 if (p->md_attr & EFI_MD_ATTR_UC) 1115 printf("UC "); 1116 if (p->md_attr & EFI_MD_ATTR_WC) 1117 printf("WC "); 1118 if (p->md_attr & EFI_MD_ATTR_WT) 1119 printf("WT "); 1120 if (p->md_attr & EFI_MD_ATTR_WB) 1121 printf("WB "); 1122 if (p->md_attr & EFI_MD_ATTR_UCE) 1123 printf("UCE "); 1124 if (p->md_attr & EFI_MD_ATTR_WP) 1125 printf("WP "); 1126 if (p->md_attr & EFI_MD_ATTR_RP) 1127 printf("RP "); 1128 if (p->md_attr & EFI_MD_ATTR_XP) 1129 printf("XP "); 1130 if (p->md_attr & EFI_MD_ATTR_RT) 1131 printf("RUNTIME"); 1132 printf("\n"); 1133 } 1134 1135 switch (p->md_type) { 1136 case EFI_MD_TYPE_CODE: 1137 case EFI_MD_TYPE_DATA: 1138 case EFI_MD_TYPE_BS_CODE: 1139 case EFI_MD_TYPE_BS_DATA: 1140 case EFI_MD_TYPE_FREE: 1141 /* 1142 * We're allowed to use any entry with these types. 1143 */ 1144 break; 1145 default: 1146 continue; 1147 } 1148 1149 if (!add_physmap_entry(p->md_phys, (p->md_pages * PAGE_SIZE), 1150 physmap, physmap_idx)) 1151 break; 1152 } 1153} 1154 1155static char bootmethod[16] = ""; 1156SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0, 1157 "System firmware boot method"); 1158 1159static void 1160native_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx) 1161{ 1162 struct bios_smap *smap; 1163 struct efi_map_header *efihdr; 1164 u_int32_t size; 1165 1166 /* 1167 * Memory map from INT 15:E820. 1168 * 1169 * subr_module.c says: 1170 * "Consumer may safely assume that size value precedes data." 1171 * ie: an int32_t immediately precedes smap. 1172 */ 1173 1174 efihdr = (struct efi_map_header *)preload_search_info(kmdp, 1175 MODINFO_METADATA | MODINFOMD_EFI_MAP); 1176 smap = (struct bios_smap *)preload_search_info(kmdp, 1177 MODINFO_METADATA | MODINFOMD_SMAP); 1178 if (efihdr == NULL && smap == NULL) 1179 panic("No BIOS smap or EFI map info from loader!"); 1180 1181 if (efihdr != NULL) { 1182 add_efi_map_entries(efihdr, physmap, physmap_idx); 1183 strlcpy(bootmethod, "UEFI", sizeof(bootmethod)); 1184 } else { 1185 size = *((u_int32_t *)smap - 1); 1186 bios_add_smap_entries(smap, size, physmap, physmap_idx); 1187 strlcpy(bootmethod, "BIOS", sizeof(bootmethod)); 1188 } 1189} 1190 1191#define PAGES_PER_GB (1024 * 1024 * 1024 / PAGE_SIZE) 1192 1193/* 1194 * Populate the (physmap) array with base/bound pairs describing the 1195 * available physical memory in the system, then test this memory and 1196 * build the phys_avail array describing the actually-available memory. 1197 * 1198 * Total memory size may be set by the kernel environment variable 1199 * hw.physmem or the compile-time define MAXMEM. 1200 * 1201 * XXX first should be vm_paddr_t. 1202 */ 1203static void 1204getmemsize(caddr_t kmdp, u_int64_t first) 1205{ 1206 int i, physmap_idx, pa_indx, da_indx; 1207 vm_paddr_t pa, physmap[PHYSMAP_SIZE]; 1208 u_long physmem_start, physmem_tunable, memtest; 1209 pt_entry_t *pte; 1210 quad_t dcons_addr, dcons_size; 1211 int page_counter; 1212 1213 bzero(physmap, sizeof(physmap)); 1214 physmap_idx = 0; 1215 1216 init_ops.parse_memmap(kmdp, physmap, &physmap_idx); 1217 physmap_idx -= 2; 1218 1219 /* 1220 * Find the 'base memory' segment for SMP 1221 */ 1222 basemem = 0; 1223 for (i = 0; i <= physmap_idx; i += 2) { 1224 if (physmap[i] <= 0xA0000) { 1225 basemem = physmap[i + 1] / 1024; 1226 break; 1227 } 1228 } 1229 if (basemem == 0 || basemem > 640) { 1230 if (bootverbose) 1231 printf( 1232 "Memory map doesn't contain a basemem segment, faking it"); 1233 basemem = 640; 1234 } 1235 1236 /* 1237 * Make hole for "AP -> long mode" bootstrap code. The 1238 * mp_bootaddress vector is only available when the kernel 1239 * is configured to support APs and APs for the system start 1240 * in 32bit mode (e.g. SMP bare metal). 1241 */ 1242 if (init_ops.mp_bootaddress) { 1243 if (physmap[1] >= 0x100000000) 1244 panic( 1245 "Basemem segment is not suitable for AP bootstrap code!"); 1246 physmap[1] = init_ops.mp_bootaddress(physmap[1] / 1024); 1247 } 1248 1249 /* 1250 * Maxmem isn't the "maximum memory", it's one larger than the 1251 * highest page of the physical address space. It should be 1252 * called something like "Maxphyspage". We may adjust this 1253 * based on ``hw.physmem'' and the results of the memory test. 1254 */ 1255 Maxmem = atop(physmap[physmap_idx + 1]); 1256 1257#ifdef MAXMEM 1258 Maxmem = MAXMEM / 4; 1259#endif 1260 1261 if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) 1262 Maxmem = atop(physmem_tunable); 1263 1264 /* 1265 * The boot memory test is disabled by default, as it takes a 1266 * significant amount of time on large-memory systems, and is 1267 * unfriendly to virtual machines as it unnecessarily touches all 1268 * pages. 1269 * 1270 * A general name is used as the code may be extended to support 1271 * additional tests beyond the current "page present" test. 1272 */ 1273 memtest = 0; 1274 TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); 1275 1276 /* 1277 * Don't allow MAXMEM or hw.physmem to extend the amount of memory 1278 * in the system. 1279 */ 1280 if (Maxmem > atop(physmap[physmap_idx + 1])) 1281 Maxmem = atop(physmap[physmap_idx + 1]); 1282 1283 if (atop(physmap[physmap_idx + 1]) != Maxmem && 1284 (boothowto & RB_VERBOSE)) 1285 printf("Physical memory use set to %ldK\n", Maxmem * 4); 1286 1287 /* call pmap initialization to make new kernel address space */ 1288 pmap_bootstrap(&first); 1289 1290 /* 1291 * Size up each available chunk of physical memory. 1292 * 1293 * XXX Some BIOSes corrupt low 64KB between suspend and resume. 1294 * By default, mask off the first 16 pages unless we appear to be 1295 * running in a VM. 1296 */ 1297 physmem_start = (vm_guest > VM_GUEST_NO ? 1 : 16) << PAGE_SHIFT; 1298 TUNABLE_ULONG_FETCH("hw.physmem.start", &physmem_start); 1299 if (physmap[0] < physmem_start) { 1300 if (physmem_start < PAGE_SIZE) 1301 physmap[0] = PAGE_SIZE; 1302 else if (physmem_start >= physmap[1]) 1303 physmap[0] = round_page(physmap[1] - PAGE_SIZE); 1304 else 1305 physmap[0] = round_page(physmem_start); 1306 } 1307 pa_indx = 0; 1308 da_indx = 1; 1309 phys_avail[pa_indx++] = physmap[0]; 1310 phys_avail[pa_indx] = physmap[0]; 1311 dump_avail[da_indx] = physmap[0]; 1312 pte = CMAP1; 1313 1314 /* 1315 * Get dcons buffer address 1316 */ 1317 if (getenv_quad("dcons.addr", &dcons_addr) == 0 || 1318 getenv_quad("dcons.size", &dcons_size) == 0) 1319 dcons_addr = 0; 1320 1321 /* 1322 * physmap is in bytes, so when converting to page boundaries, 1323 * round up the start address and round down the end address. 1324 */ 1325 page_counter = 0; 1326 if (memtest != 0) 1327 printf("Testing system memory"); 1328 for (i = 0; i <= physmap_idx; i += 2) { 1329 vm_paddr_t end; 1330 1331 end = ptoa((vm_paddr_t)Maxmem); 1332 if (physmap[i + 1] < end) 1333 end = trunc_page(physmap[i + 1]); 1334 for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { 1335 int tmp, page_bad, full; 1336 int *ptr = (int *)CADDR1; 1337 1338 full = FALSE; 1339 /* 1340 * block out kernel memory as not available. 1341 */ 1342 if (pa >= (vm_paddr_t)kernphys && pa < first) 1343 goto do_dump_avail; 1344 1345 /* 1346 * block out dcons buffer 1347 */ 1348 if (dcons_addr > 0 1349 && pa >= trunc_page(dcons_addr) 1350 && pa < dcons_addr + dcons_size) 1351 goto do_dump_avail; 1352 1353 page_bad = FALSE; 1354 if (memtest == 0) 1355 goto skip_memtest; 1356 1357 /* 1358 * Print a "." every GB to show we're making 1359 * progress. 1360 */ 1361 page_counter++; 1362 if ((page_counter % PAGES_PER_GB) == 0) 1363 printf("."); 1364 1365 /* 1366 * map page into kernel: valid, read/write,non-cacheable 1367 */ 1368 *pte = pa | PG_V | PG_RW | PG_NC_PWT | PG_NC_PCD; 1369 invltlb(); 1370 1371 tmp = *(int *)ptr; 1372 /* 1373 * Test for alternating 1's and 0's 1374 */ 1375 *(volatile int *)ptr = 0xaaaaaaaa; 1376 if (*(volatile int *)ptr != 0xaaaaaaaa) 1377 page_bad = TRUE; 1378 /* 1379 * Test for alternating 0's and 1's 1380 */ 1381 *(volatile int *)ptr = 0x55555555; 1382 if (*(volatile int *)ptr != 0x55555555) 1383 page_bad = TRUE; 1384 /* 1385 * Test for all 1's 1386 */ 1387 *(volatile int *)ptr = 0xffffffff; 1388 if (*(volatile int *)ptr != 0xffffffff) 1389 page_bad = TRUE; 1390 /* 1391 * Test for all 0's 1392 */ 1393 *(volatile int *)ptr = 0x0; 1394 if (*(volatile int *)ptr != 0x0) 1395 page_bad = TRUE; 1396 /* 1397 * Restore original value. 1398 */ 1399 *(int *)ptr = tmp; 1400 1401skip_memtest: 1402 /* 1403 * Adjust array of valid/good pages. 1404 */ 1405 if (page_bad == TRUE) 1406 continue; 1407 /* 1408 * If this good page is a continuation of the 1409 * previous set of good pages, then just increase 1410 * the end pointer. Otherwise start a new chunk. 1411 * Note that "end" points one higher than end, 1412 * making the range >= start and < end. 1413 * If we're also doing a speculative memory 1414 * test and we at or past the end, bump up Maxmem 1415 * so that we keep going. The first bad page 1416 * will terminate the loop. 1417 */ 1418 if (phys_avail[pa_indx] == pa) { 1419 phys_avail[pa_indx] += PAGE_SIZE; 1420 } else { 1421 pa_indx++; 1422 if (pa_indx == PHYS_AVAIL_ARRAY_END) { 1423 printf( 1424 "Too many holes in the physical address space, giving up\n"); 1425 pa_indx--; 1426 full = TRUE; 1427 goto do_dump_avail; 1428 } 1429 phys_avail[pa_indx++] = pa; /* start */ 1430 phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ 1431 } 1432 physmem++; 1433do_dump_avail: 1434 if (dump_avail[da_indx] == pa) { 1435 dump_avail[da_indx] += PAGE_SIZE; 1436 } else { 1437 da_indx++; 1438 if (da_indx == DUMP_AVAIL_ARRAY_END) { 1439 da_indx--; 1440 goto do_next; 1441 } 1442 dump_avail[da_indx++] = pa; /* start */ 1443 dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ 1444 } 1445do_next: 1446 if (full) 1447 break; 1448 } 1449 } 1450 *pte = 0; 1451 invltlb(); 1452 if (memtest != 0) 1453 printf("\n"); 1454 1455 /* 1456 * XXX 1457 * The last chunk must contain at least one page plus the message 1458 * buffer to avoid complicating other code (message buffer address 1459 * calculation, etc.). 1460 */ 1461 while (phys_avail[pa_indx - 1] + PAGE_SIZE + 1462 round_page(msgbufsize) >= phys_avail[pa_indx]) { 1463 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); 1464 phys_avail[pa_indx--] = 0; 1465 phys_avail[pa_indx--] = 0; 1466 } 1467 1468 Maxmem = atop(phys_avail[pa_indx]); 1469 1470 /* Trim off space for the message buffer. */ 1471 phys_avail[pa_indx] -= round_page(msgbufsize); 1472 1473 /* Map the message buffer. */ 1474 msgbufp = (struct msgbuf *)PHYS_TO_DMAP(phys_avail[pa_indx]); 1475} 1476 1477static caddr_t 1478native_parse_preload_data(u_int64_t modulep) 1479{ 1480 caddr_t kmdp; 1481#ifdef DDB 1482 vm_offset_t ksym_start; 1483 vm_offset_t ksym_end; 1484#endif 1485 1486 preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE); 1487 preload_bootstrap_relocate(KERNBASE); 1488 kmdp = preload_search_by_type("elf kernel"); 1489 if (kmdp == NULL) 1490 kmdp = preload_search_by_type("elf64 kernel"); 1491 boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); 1492 kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *) + KERNBASE; 1493#ifdef DDB 1494 ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); 1495 ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); 1496 db_fetch_ksymtab(ksym_start, ksym_end); 1497#endif 1498 1499 return (kmdp); 1500} 1501 1502u_int64_t 1503hammer_time(u_int64_t modulep, u_int64_t physfree) 1504{ 1505 caddr_t kmdp; 1506 int gsel_tss, x; 1507 struct pcpu *pc; 1508 struct nmi_pcpu *np; 1509 struct xstate_hdr *xhdr; 1510 u_int64_t msr; 1511 char *env; 1512 size_t kstack0_sz; 1513 1514 /* 1515 * This may be done better later if it gets more high level 1516 * components in it. If so just link td->td_proc here. 1517 */ 1518 proc_linkup0(&proc0, &thread0); 1519 1520 kmdp = init_ops.parse_preload_data(modulep); 1521 1522 /* Init basic tunables, hz etc */ 1523 init_param1(); 1524 1525 thread0.td_kstack = physfree + KERNBASE; 1526 thread0.td_kstack_pages = kstack_pages; 1527 kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE; 1528 bzero((void *)thread0.td_kstack, kstack0_sz); 1529 physfree += kstack0_sz; 1530 1531 /* 1532 * make gdt memory segments 1533 */ 1534 for (x = 0; x < NGDT; x++) { 1535 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && 1536 x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1) 1537 ssdtosd(&gdt_segs[x], &gdt[x]); 1538 } 1539 gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0]; 1540 ssdtosyssd(&gdt_segs[GPROC0_SEL], 1541 (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); 1542 1543 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 1544 r_gdt.rd_base = (long) gdt; 1545 lgdt(&r_gdt); 1546 pc = &__pcpu[0]; 1547 1548 wrmsr(MSR_FSBASE, 0); /* User value */ 1549 wrmsr(MSR_GSBASE, (u_int64_t)pc); 1550 wrmsr(MSR_KGSBASE, 0); /* User value while in the kernel */ 1551 1552 pcpu_init(pc, 0, sizeof(struct pcpu)); 1553 dpcpu_init((void *)(physfree + KERNBASE), 0); 1554 physfree += DPCPU_SIZE; 1555 PCPU_SET(prvspace, pc); 1556 PCPU_SET(curthread, &thread0); 1557 PCPU_SET(tssp, &common_tss[0]); 1558 PCPU_SET(commontssp, &common_tss[0]); 1559 PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); 1560 PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]); 1561 PCPU_SET(fs32p, &gdt[GUFS32_SEL]); 1562 PCPU_SET(gs32p, &gdt[GUGS32_SEL]); 1563 1564 /* 1565 * Initialize mutexes. 1566 * 1567 * icu_lock: in order to allow an interrupt to occur in a critical 1568 * section, to set pcpu->ipending (etc...) properly, we 1569 * must be able to get the icu lock, so it can't be 1570 * under witness. 1571 */ 1572 mutex_init(); 1573 mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); 1574 mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF); 1575 1576 /* exceptions */ 1577 for (x = 0; x < NIDT; x++) 1578 setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); 1579 setidt(IDT_DE, &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0); 1580 setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 0); 1581 setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2); 1582 setidt(IDT_BP, &IDTVEC(bpt), SDT_SYSIGT, SEL_UPL, 0); 1583 setidt(IDT_OF, &IDTVEC(ofl), SDT_SYSIGT, SEL_KPL, 0); 1584 setidt(IDT_BR, &IDTVEC(bnd), SDT_SYSIGT, SEL_KPL, 0); 1585 setidt(IDT_UD, &IDTVEC(ill), SDT_SYSIGT, SEL_KPL, 0); 1586 setidt(IDT_NM, &IDTVEC(dna), SDT_SYSIGT, SEL_KPL, 0); 1587 setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1); 1588 setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYSIGT, SEL_KPL, 0); 1589 setidt(IDT_TS, &IDTVEC(tss), SDT_SYSIGT, SEL_KPL, 0); 1590 setidt(IDT_NP, &IDTVEC(missing), SDT_SYSIGT, SEL_KPL, 0); 1591 setidt(IDT_SS, &IDTVEC(stk), SDT_SYSIGT, SEL_KPL, 0); 1592 setidt(IDT_GP, &IDTVEC(prot), SDT_SYSIGT, SEL_KPL, 0); 1593 setidt(IDT_PF, &IDTVEC(page), SDT_SYSIGT, SEL_KPL, 0); 1594 setidt(IDT_MF, &IDTVEC(fpu), SDT_SYSIGT, SEL_KPL, 0); 1595 setidt(IDT_AC, &IDTVEC(align), SDT_SYSIGT, SEL_KPL, 0); 1596 setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 0); 1597 setidt(IDT_XF, &IDTVEC(xmm), SDT_SYSIGT, SEL_KPL, 0); 1598#ifdef KDTRACE_HOOKS 1599 setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0); 1600#endif 1601#ifdef XENHVM 1602 setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYSIGT, SEL_UPL, 0); 1603#endif 1604 1605 r_idt.rd_limit = sizeof(idt0) - 1; 1606 r_idt.rd_base = (long) idt; 1607 lidt(&r_idt); 1608 1609 /* 1610 * Initialize the clock before the console so that console 1611 * initialization can use DELAY(). 1612 */ 1613 clock_init(); 1614 1615 /* 1616 * Use vt(4) by default for UEFI boot (during the sc(4)/vt(4) 1617 * transition). 1618 */ 1619 if (preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP) 1620 != NULL) 1621 vty_set_preferred(VTY_VT); 1622 1623 identify_cpu(); /* Final stage of CPU initialization */ 1624 initializecpu(); /* Initialize CPU registers */ 1625 initializecpucache(); 1626 1627 /* doublefault stack space, runs on ist1 */ 1628 common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; 1629 1630 /* 1631 * NMI stack, runs on ist2. The pcpu pointer is stored just 1632 * above the start of the ist2 stack. 1633 */ 1634 np = ((struct nmi_pcpu *) &nmi0_stack[sizeof(nmi0_stack)]) - 1; 1635 np->np_pcpu = (register_t) pc; 1636 common_tss[0].tss_ist2 = (long) np; 1637 1638 /* Set the IO permission bitmap (empty due to tss seg limit) */ 1639 common_tss[0].tss_iobase = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE; 1640 1641 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 1642 ltr(gsel_tss); 1643 1644 /* Set up the fast syscall stuff */ 1645 msr = rdmsr(MSR_EFER) | EFER_SCE; 1646 wrmsr(MSR_EFER, msr); 1647 wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall)); 1648 wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); 1649 msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | 1650 ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); 1651 wrmsr(MSR_STAR, msr); 1652 wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); 1653 1654 getmemsize(kmdp, physfree); 1655 init_param2(physmem); 1656 1657 /* now running on new page tables, configured,and u/iom is accessible */ 1658 1659 cninit(); 1660 1661#ifdef DEV_ISA 1662#ifdef DEV_ATPIC 1663 elcr_probe(); 1664 atpic_startup(); 1665#else 1666 /* Reset and mask the atpics and leave them shut down. */ 1667 atpic_reset(); 1668 1669 /* 1670 * Point the ICU spurious interrupt vectors at the APIC spurious 1671 * interrupt handler. 1672 */ 1673 setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); 1674 setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); 1675#endif 1676#else 1677#error "have you forgotten the isa device?"; 1678#endif 1679 1680 kdb_init(); 1681 1682#ifdef KDB 1683 if (boothowto & RB_KDB) 1684 kdb_enter(KDB_WHY_BOOTFLAGS, 1685 "Boot flags requested debugger"); 1686#endif 1687 1688 msgbufinit(msgbufp, msgbufsize); 1689 fpuinit(); 1690 1691 /* 1692 * Set up thread0 pcb after fpuinit calculated pcb + fpu save 1693 * area size. Zero out the extended state header in fpu save 1694 * area. 1695 */ 1696 thread0.td_pcb = get_pcb_td(&thread0); 1697 bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size); 1698 if (use_xsave) { 1699 xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) + 1700 1); 1701 xhdr->xstate_bv = xsave_mask; 1702 } 1703 /* make an initial tss so cpu can get interrupt stack on syscall! */ 1704 common_tss[0].tss_rsp0 = (vm_offset_t)thread0.td_pcb; 1705 /* Ensure the stack is aligned to 16 bytes */ 1706 common_tss[0].tss_rsp0 &= ~0xFul; 1707 PCPU_SET(rsp0, common_tss[0].tss_rsp0); 1708 PCPU_SET(curpcb, thread0.td_pcb); 1709 1710 /* transfer to user mode */ 1711 1712 _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); 1713 _udatasel = GSEL(GUDATA_SEL, SEL_UPL); 1714 _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL); 1715 _ufssel = GSEL(GUFS32_SEL, SEL_UPL); 1716 _ugssel = GSEL(GUGS32_SEL, SEL_UPL); 1717 1718 load_ds(_udatasel); 1719 load_es(_udatasel); 1720 load_fs(_ufssel); 1721 1722 /* setup proc 0's pcb */ 1723 thread0.td_pcb->pcb_flags = 0; 1724 thread0.td_frame = &proc0_tf; 1725 1726 env = kern_getenv("kernelname"); 1727 if (env != NULL) 1728 strlcpy(kernelname, env, sizeof(kernelname)); 1729 1730 cpu_probe_amdc1e(); 1731 1732#ifdef FDT 1733 x86_init_fdt(); 1734#endif 1735 1736 /* Location of kernel stack for locore */ 1737 return ((u_int64_t)thread0.td_pcb); 1738} 1739 1740void 1741cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) 1742{ 1743 1744 pcpu->pc_acpi_id = 0xffffffff; 1745} 1746 1747static int 1748smap_sysctl_handler(SYSCTL_HANDLER_ARGS) 1749{ 1750 struct bios_smap *smapbase; 1751 struct bios_smap_xattr smap; 1752 caddr_t kmdp; 1753 uint32_t *smapattr; 1754 int count, error, i; 1755 1756 /* Retrieve the system memory map from the loader. */ 1757 kmdp = preload_search_by_type("elf kernel"); 1758 if (kmdp == NULL) 1759 kmdp = preload_search_by_type("elf64 kernel"); 1760 smapbase = (struct bios_smap *)preload_search_info(kmdp, 1761 MODINFO_METADATA | MODINFOMD_SMAP); 1762 if (smapbase == NULL) 1763 return (0); 1764 smapattr = (uint32_t *)preload_search_info(kmdp, 1765 MODINFO_METADATA | MODINFOMD_SMAP_XATTR); 1766 count = *((uint32_t *)smapbase - 1) / sizeof(*smapbase); 1767 error = 0; 1768 for (i = 0; i < count; i++) { 1769 smap.base = smapbase[i].base; 1770 smap.length = smapbase[i].length; 1771 smap.type = smapbase[i].type; 1772 if (smapattr != NULL) 1773 smap.xattr = smapattr[i]; 1774 else 1775 smap.xattr = 0; 1776 error = SYSCTL_OUT(req, &smap, sizeof(smap)); 1777 } 1778 return (error); 1779} 1780SYSCTL_PROC(_machdep, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0, 1781 smap_sysctl_handler, "S,bios_smap_xattr", "Raw BIOS SMAP data"); 1782 1783static int 1784efi_map_sysctl_handler(SYSCTL_HANDLER_ARGS) 1785{ 1786 struct efi_map_header *efihdr; 1787 caddr_t kmdp; 1788 uint32_t efisize; 1789 1790 kmdp = preload_search_by_type("elf kernel"); 1791 if (kmdp == NULL) 1792 kmdp = preload_search_by_type("elf64 kernel"); 1793 efihdr = (struct efi_map_header *)preload_search_info(kmdp, 1794 MODINFO_METADATA | MODINFOMD_EFI_MAP); 1795 if (efihdr == NULL) 1796 return (0); 1797 efisize = *((uint32_t *)efihdr - 1); 1798 return (SYSCTL_OUT(req, efihdr, efisize)); 1799} 1800SYSCTL_PROC(_machdep, OID_AUTO, efi_map, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0, 1801 efi_map_sysctl_handler, "S,efi_map_header", "Raw EFI Memory Map"); 1802 1803void 1804spinlock_enter(void) 1805{ 1806 struct thread *td; 1807 register_t flags; 1808 1809 td = curthread; 1810 if (td->td_md.md_spinlock_count == 0) { 1811 flags = intr_disable(); 1812 td->td_md.md_spinlock_count = 1; 1813 td->td_md.md_saved_flags = flags; 1814 } else 1815 td->td_md.md_spinlock_count++; 1816 critical_enter(); 1817} 1818 1819void 1820spinlock_exit(void) 1821{ 1822 struct thread *td; 1823 register_t flags; 1824 1825 td = curthread; 1826 critical_exit(); 1827 flags = td->td_md.md_saved_flags; 1828 td->td_md.md_spinlock_count--; 1829 if (td->td_md.md_spinlock_count == 0) 1830 intr_restore(flags); 1831} 1832 1833/* 1834 * Construct a PCB from a trapframe. This is called from kdb_trap() where 1835 * we want to start a backtrace from the function that caused us to enter 1836 * the debugger. We have the context in the trapframe, but base the trace 1837 * on the PCB. The PCB doesn't have to be perfect, as long as it contains 1838 * enough for a backtrace. 1839 */ 1840void 1841makectx(struct trapframe *tf, struct pcb *pcb) 1842{ 1843 1844 pcb->pcb_r12 = tf->tf_r12; 1845 pcb->pcb_r13 = tf->tf_r13; 1846 pcb->pcb_r14 = tf->tf_r14; 1847 pcb->pcb_r15 = tf->tf_r15; 1848 pcb->pcb_rbp = tf->tf_rbp; 1849 pcb->pcb_rbx = tf->tf_rbx; 1850 pcb->pcb_rip = tf->tf_rip; 1851 pcb->pcb_rsp = tf->tf_rsp; 1852} 1853 1854int 1855ptrace_set_pc(struct thread *td, unsigned long addr) 1856{ 1857 1858 td->td_frame->tf_rip = addr; 1859 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 1860 return (0); 1861} 1862 1863int 1864ptrace_single_step(struct thread *td) 1865{ 1866 td->td_frame->tf_rflags |= PSL_T; 1867 return (0); 1868} 1869 1870int 1871ptrace_clear_single_step(struct thread *td) 1872{ 1873 td->td_frame->tf_rflags &= ~PSL_T; 1874 return (0); 1875} 1876 1877int 1878fill_regs(struct thread *td, struct reg *regs) 1879{ 1880 struct trapframe *tp; 1881 1882 tp = td->td_frame; 1883 return (fill_frame_regs(tp, regs)); 1884} 1885 1886int 1887fill_frame_regs(struct trapframe *tp, struct reg *regs) 1888{ 1889 regs->r_r15 = tp->tf_r15; 1890 regs->r_r14 = tp->tf_r14; 1891 regs->r_r13 = tp->tf_r13; 1892 regs->r_r12 = tp->tf_r12; 1893 regs->r_r11 = tp->tf_r11; 1894 regs->r_r10 = tp->tf_r10; 1895 regs->r_r9 = tp->tf_r9; 1896 regs->r_r8 = tp->tf_r8; 1897 regs->r_rdi = tp->tf_rdi; 1898 regs->r_rsi = tp->tf_rsi; 1899 regs->r_rbp = tp->tf_rbp; 1900 regs->r_rbx = tp->tf_rbx; 1901 regs->r_rdx = tp->tf_rdx; 1902 regs->r_rcx = tp->tf_rcx; 1903 regs->r_rax = tp->tf_rax; 1904 regs->r_rip = tp->tf_rip; 1905 regs->r_cs = tp->tf_cs; 1906 regs->r_rflags = tp->tf_rflags; 1907 regs->r_rsp = tp->tf_rsp; 1908 regs->r_ss = tp->tf_ss; 1909 if (tp->tf_flags & TF_HASSEGS) { 1910 regs->r_ds = tp->tf_ds; 1911 regs->r_es = tp->tf_es; 1912 regs->r_fs = tp->tf_fs; 1913 regs->r_gs = tp->tf_gs; 1914 } else { 1915 regs->r_ds = 0; 1916 regs->r_es = 0; 1917 regs->r_fs = 0; 1918 regs->r_gs = 0; 1919 } 1920 return (0); 1921} 1922 1923int 1924set_regs(struct thread *td, struct reg *regs) 1925{ 1926 struct trapframe *tp; 1927 register_t rflags; 1928 1929 tp = td->td_frame; 1930 rflags = regs->r_rflags & 0xffffffff; 1931 if (!EFL_SECURE(rflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs)) 1932 return (EINVAL); 1933 tp->tf_r15 = regs->r_r15; 1934 tp->tf_r14 = regs->r_r14; 1935 tp->tf_r13 = regs->r_r13; 1936 tp->tf_r12 = regs->r_r12; 1937 tp->tf_r11 = regs->r_r11; 1938 tp->tf_r10 = regs->r_r10; 1939 tp->tf_r9 = regs->r_r9; 1940 tp->tf_r8 = regs->r_r8; 1941 tp->tf_rdi = regs->r_rdi; 1942 tp->tf_rsi = regs->r_rsi; 1943 tp->tf_rbp = regs->r_rbp; 1944 tp->tf_rbx = regs->r_rbx; 1945 tp->tf_rdx = regs->r_rdx; 1946 tp->tf_rcx = regs->r_rcx; 1947 tp->tf_rax = regs->r_rax; 1948 tp->tf_rip = regs->r_rip; 1949 tp->tf_cs = regs->r_cs; 1950 tp->tf_rflags = rflags; 1951 tp->tf_rsp = regs->r_rsp; 1952 tp->tf_ss = regs->r_ss; 1953 if (0) { /* XXXKIB */ 1954 tp->tf_ds = regs->r_ds; 1955 tp->tf_es = regs->r_es; 1956 tp->tf_fs = regs->r_fs; 1957 tp->tf_gs = regs->r_gs; 1958 tp->tf_flags = TF_HASSEGS; 1959 } 1960 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 1961 return (0); 1962} 1963 1964/* XXX check all this stuff! */ 1965/* externalize from sv_xmm */ 1966static void 1967fill_fpregs_xmm(struct savefpu *sv_xmm, struct fpreg *fpregs) 1968{ 1969 struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env; 1970 struct envxmm *penv_xmm = &sv_xmm->sv_env; 1971 int i; 1972 1973 /* pcb -> fpregs */ 1974 bzero(fpregs, sizeof(*fpregs)); 1975 1976 /* FPU control/status */ 1977 penv_fpreg->en_cw = penv_xmm->en_cw; 1978 penv_fpreg->en_sw = penv_xmm->en_sw; 1979 penv_fpreg->en_tw = penv_xmm->en_tw; 1980 penv_fpreg->en_opcode = penv_xmm->en_opcode; 1981 penv_fpreg->en_rip = penv_xmm->en_rip; 1982 penv_fpreg->en_rdp = penv_xmm->en_rdp; 1983 penv_fpreg->en_mxcsr = penv_xmm->en_mxcsr; 1984 penv_fpreg->en_mxcsr_mask = penv_xmm->en_mxcsr_mask; 1985 1986 /* FPU registers */ 1987 for (i = 0; i < 8; ++i) 1988 bcopy(sv_xmm->sv_fp[i].fp_acc.fp_bytes, fpregs->fpr_acc[i], 10); 1989 1990 /* SSE registers */ 1991 for (i = 0; i < 16; ++i) 1992 bcopy(sv_xmm->sv_xmm[i].xmm_bytes, fpregs->fpr_xacc[i], 16); 1993} 1994 1995/* internalize from fpregs into sv_xmm */ 1996static void 1997set_fpregs_xmm(struct fpreg *fpregs, struct savefpu *sv_xmm) 1998{ 1999 struct envxmm *penv_xmm = &sv_xmm->sv_env; 2000 struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env; 2001 int i; 2002 2003 /* fpregs -> pcb */ 2004 /* FPU control/status */ 2005 penv_xmm->en_cw = penv_fpreg->en_cw; 2006 penv_xmm->en_sw = penv_fpreg->en_sw; 2007 penv_xmm->en_tw = penv_fpreg->en_tw; 2008 penv_xmm->en_opcode = penv_fpreg->en_opcode; 2009 penv_xmm->en_rip = penv_fpreg->en_rip; 2010 penv_xmm->en_rdp = penv_fpreg->en_rdp; 2011 penv_xmm->en_mxcsr = penv_fpreg->en_mxcsr; 2012 penv_xmm->en_mxcsr_mask = penv_fpreg->en_mxcsr_mask & cpu_mxcsr_mask; 2013 2014 /* FPU registers */ 2015 for (i = 0; i < 8; ++i) 2016 bcopy(fpregs->fpr_acc[i], sv_xmm->sv_fp[i].fp_acc.fp_bytes, 10); 2017 2018 /* SSE registers */ 2019 for (i = 0; i < 16; ++i) 2020 bcopy(fpregs->fpr_xacc[i], sv_xmm->sv_xmm[i].xmm_bytes, 16); 2021} 2022 2023/* externalize from td->pcb */ 2024int 2025fill_fpregs(struct thread *td, struct fpreg *fpregs) 2026{ 2027 2028 KASSERT(td == curthread || TD_IS_SUSPENDED(td) || 2029 P_SHOULDSTOP(td->td_proc), 2030 ("not suspended thread %p", td)); 2031 fpugetregs(td); 2032 fill_fpregs_xmm(get_pcb_user_save_td(td), fpregs); 2033 return (0); 2034} 2035 2036/* internalize to td->pcb */ 2037int 2038set_fpregs(struct thread *td, struct fpreg *fpregs) 2039{ 2040 2041 set_fpregs_xmm(fpregs, get_pcb_user_save_td(td)); 2042 fpuuserinited(td); 2043 return (0); 2044} 2045 2046/* 2047 * Get machine context. 2048 */ 2049int 2050get_mcontext(struct thread *td, mcontext_t *mcp, int flags) 2051{ 2052 struct pcb *pcb; 2053 struct trapframe *tp; 2054 2055 pcb = td->td_pcb; 2056 tp = td->td_frame; 2057 PROC_LOCK(curthread->td_proc); 2058 mcp->mc_onstack = sigonstack(tp->tf_rsp); 2059 PROC_UNLOCK(curthread->td_proc); 2060 mcp->mc_r15 = tp->tf_r15; 2061 mcp->mc_r14 = tp->tf_r14; 2062 mcp->mc_r13 = tp->tf_r13; 2063 mcp->mc_r12 = tp->tf_r12; 2064 mcp->mc_r11 = tp->tf_r11; 2065 mcp->mc_r10 = tp->tf_r10; 2066 mcp->mc_r9 = tp->tf_r9; 2067 mcp->mc_r8 = tp->tf_r8; 2068 mcp->mc_rdi = tp->tf_rdi; 2069 mcp->mc_rsi = tp->tf_rsi; 2070 mcp->mc_rbp = tp->tf_rbp; 2071 mcp->mc_rbx = tp->tf_rbx; 2072 mcp->mc_rcx = tp->tf_rcx; 2073 mcp->mc_rflags = tp->tf_rflags; 2074 if (flags & GET_MC_CLEAR_RET) { 2075 mcp->mc_rax = 0; 2076 mcp->mc_rdx = 0; 2077 mcp->mc_rflags &= ~PSL_C; 2078 } else { 2079 mcp->mc_rax = tp->tf_rax; 2080 mcp->mc_rdx = tp->tf_rdx; 2081 } 2082 mcp->mc_rip = tp->tf_rip; 2083 mcp->mc_cs = tp->tf_cs; 2084 mcp->mc_rsp = tp->tf_rsp; 2085 mcp->mc_ss = tp->tf_ss; 2086 mcp->mc_ds = tp->tf_ds; 2087 mcp->mc_es = tp->tf_es; 2088 mcp->mc_fs = tp->tf_fs; 2089 mcp->mc_gs = tp->tf_gs; 2090 mcp->mc_flags = tp->tf_flags; 2091 mcp->mc_len = sizeof(*mcp); 2092 get_fpcontext(td, mcp, NULL, 0); 2093 mcp->mc_fsbase = pcb->pcb_fsbase; 2094 mcp->mc_gsbase = pcb->pcb_gsbase; 2095 mcp->mc_xfpustate = 0; 2096 mcp->mc_xfpustate_len = 0; 2097 bzero(mcp->mc_spare, sizeof(mcp->mc_spare)); 2098 return (0); 2099} 2100 2101/* 2102 * Set machine context. 2103 * 2104 * However, we don't set any but the user modifiable flags, and we won't 2105 * touch the cs selector. 2106 */ 2107int 2108set_mcontext(struct thread *td, mcontext_t *mcp) 2109{ 2110 struct pcb *pcb; 2111 struct trapframe *tp; 2112 char *xfpustate; 2113 long rflags; 2114 int ret; 2115 2116 pcb = td->td_pcb; 2117 tp = td->td_frame; 2118 if (mcp->mc_len != sizeof(*mcp) || 2119 (mcp->mc_flags & ~_MC_FLAG_MASK) != 0) 2120 return (EINVAL); 2121 rflags = (mcp->mc_rflags & PSL_USERCHANGE) | 2122 (tp->tf_rflags & ~PSL_USERCHANGE); 2123 if (mcp->mc_flags & _MC_HASFPXSTATE) { 2124 if (mcp->mc_xfpustate_len > cpu_max_ext_state_size - 2125 sizeof(struct savefpu)) 2126 return (EINVAL); 2127 xfpustate = __builtin_alloca(mcp->mc_xfpustate_len); 2128 ret = copyin((void *)mcp->mc_xfpustate, xfpustate, 2129 mcp->mc_xfpustate_len); 2130 if (ret != 0) 2131 return (ret); 2132 } else 2133 xfpustate = NULL; 2134 ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len); 2135 if (ret != 0) 2136 return (ret); 2137 tp->tf_r15 = mcp->mc_r15; 2138 tp->tf_r14 = mcp->mc_r14; 2139 tp->tf_r13 = mcp->mc_r13; 2140 tp->tf_r12 = mcp->mc_r12; 2141 tp->tf_r11 = mcp->mc_r11; 2142 tp->tf_r10 = mcp->mc_r10; 2143 tp->tf_r9 = mcp->mc_r9; 2144 tp->tf_r8 = mcp->mc_r8; 2145 tp->tf_rdi = mcp->mc_rdi; 2146 tp->tf_rsi = mcp->mc_rsi; 2147 tp->tf_rbp = mcp->mc_rbp; 2148 tp->tf_rbx = mcp->mc_rbx; 2149 tp->tf_rdx = mcp->mc_rdx; 2150 tp->tf_rcx = mcp->mc_rcx; 2151 tp->tf_rax = mcp->mc_rax; 2152 tp->tf_rip = mcp->mc_rip; 2153 tp->tf_rflags = rflags; 2154 tp->tf_rsp = mcp->mc_rsp; 2155 tp->tf_ss = mcp->mc_ss; 2156 tp->tf_flags = mcp->mc_flags; 2157 if (tp->tf_flags & TF_HASSEGS) { 2158 tp->tf_ds = mcp->mc_ds; 2159 tp->tf_es = mcp->mc_es; 2160 tp->tf_fs = mcp->mc_fs; 2161 tp->tf_gs = mcp->mc_gs; 2162 } 2163 if (mcp->mc_flags & _MC_HASBASES) { 2164 pcb->pcb_fsbase = mcp->mc_fsbase; 2165 pcb->pcb_gsbase = mcp->mc_gsbase; 2166 } 2167 set_pcb_flags(pcb, PCB_FULL_IRET); 2168 return (0); 2169} 2170 2171static void 2172get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave, 2173 size_t xfpusave_len) 2174{ 2175 size_t max_len, len; 2176 2177 mcp->mc_ownedfp = fpugetregs(td); 2178 bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0], 2179 sizeof(mcp->mc_fpstate)); 2180 mcp->mc_fpformat = fpuformat(); 2181 if (!use_xsave || xfpusave_len == 0) 2182 return; 2183 max_len = cpu_max_ext_state_size - sizeof(struct savefpu); 2184 len = xfpusave_len; 2185 if (len > max_len) { 2186 len = max_len; 2187 bzero(xfpusave + max_len, len - max_len); 2188 } 2189 mcp->mc_flags |= _MC_HASFPXSTATE; 2190 mcp->mc_xfpustate_len = len; 2191 bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len); 2192} 2193 2194static int 2195set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, 2196 size_t xfpustate_len) 2197{ 2198 struct savefpu *fpstate; 2199 int error; 2200 2201 if (mcp->mc_fpformat == _MC_FPFMT_NODEV) 2202 return (0); 2203 else if (mcp->mc_fpformat != _MC_FPFMT_XMM) 2204 return (EINVAL); 2205 else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) { 2206 /* We don't care what state is left in the FPU or PCB. */ 2207 fpstate_drop(td); 2208 error = 0; 2209 } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || 2210 mcp->mc_ownedfp == _MC_FPOWNED_PCB) { 2211 fpstate = (struct savefpu *)&mcp->mc_fpstate; 2212 fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask; 2213 error = fpusetregs(td, fpstate, xfpustate, xfpustate_len); 2214 } else 2215 return (EINVAL); 2216 return (error); 2217} 2218 2219void 2220fpstate_drop(struct thread *td) 2221{ 2222 2223 KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu")); 2224 critical_enter(); 2225 if (PCPU_GET(fpcurthread) == td) 2226 fpudrop(); 2227 /* 2228 * XXX force a full drop of the fpu. The above only drops it if we 2229 * owned it. 2230 * 2231 * XXX I don't much like fpugetuserregs()'s semantics of doing a full 2232 * drop. Dropping only to the pcb matches fnsave's behaviour. 2233 * We only need to drop to !PCB_INITDONE in sendsig(). But 2234 * sendsig() is the only caller of fpugetuserregs()... perhaps we just 2235 * have too many layers. 2236 */ 2237 clear_pcb_flags(curthread->td_pcb, 2238 PCB_FPUINITDONE | PCB_USERFPUINITDONE); 2239 critical_exit(); 2240} 2241 2242int 2243fill_dbregs(struct thread *td, struct dbreg *dbregs) 2244{ 2245 struct pcb *pcb; 2246 2247 if (td == NULL) { 2248 dbregs->dr[0] = rdr0(); 2249 dbregs->dr[1] = rdr1(); 2250 dbregs->dr[2] = rdr2(); 2251 dbregs->dr[3] = rdr3(); 2252 dbregs->dr[6] = rdr6(); 2253 dbregs->dr[7] = rdr7(); 2254 } else { 2255 pcb = td->td_pcb; 2256 dbregs->dr[0] = pcb->pcb_dr0; 2257 dbregs->dr[1] = pcb->pcb_dr1; 2258 dbregs->dr[2] = pcb->pcb_dr2; 2259 dbregs->dr[3] = pcb->pcb_dr3; 2260 dbregs->dr[6] = pcb->pcb_dr6; 2261 dbregs->dr[7] = pcb->pcb_dr7; 2262 } 2263 dbregs->dr[4] = 0; 2264 dbregs->dr[5] = 0; 2265 dbregs->dr[8] = 0; 2266 dbregs->dr[9] = 0; 2267 dbregs->dr[10] = 0; 2268 dbregs->dr[11] = 0; 2269 dbregs->dr[12] = 0; 2270 dbregs->dr[13] = 0; 2271 dbregs->dr[14] = 0; 2272 dbregs->dr[15] = 0; 2273 return (0); 2274} 2275 2276int 2277set_dbregs(struct thread *td, struct dbreg *dbregs) 2278{ 2279 struct pcb *pcb; 2280 int i; 2281 2282 if (td == NULL) { 2283 load_dr0(dbregs->dr[0]); 2284 load_dr1(dbregs->dr[1]); 2285 load_dr2(dbregs->dr[2]); 2286 load_dr3(dbregs->dr[3]); 2287 load_dr6(dbregs->dr[6]); 2288 load_dr7(dbregs->dr[7]); 2289 } else { 2290 /* 2291 * Don't let an illegal value for dr7 get set. Specifically, 2292 * check for undefined settings. Setting these bit patterns 2293 * result in undefined behaviour and can lead to an unexpected 2294 * TRCTRAP or a general protection fault right here. 2295 * Upper bits of dr6 and dr7 must not be set 2296 */ 2297 for (i = 0; i < 4; i++) { 2298 if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02) 2299 return (EINVAL); 2300 if (td->td_frame->tf_cs == _ucode32sel && 2301 DBREG_DR7_LEN(dbregs->dr[7], i) == DBREG_DR7_LEN_8) 2302 return (EINVAL); 2303 } 2304 if ((dbregs->dr[6] & 0xffffffff00000000ul) != 0 || 2305 (dbregs->dr[7] & 0xffffffff00000000ul) != 0) 2306 return (EINVAL); 2307 2308 pcb = td->td_pcb; 2309 2310 /* 2311 * Don't let a process set a breakpoint that is not within the 2312 * process's address space. If a process could do this, it 2313 * could halt the system by setting a breakpoint in the kernel 2314 * (if ddb was enabled). Thus, we need to check to make sure 2315 * that no breakpoints are being enabled for addresses outside 2316 * process's address space. 2317 * 2318 * XXX - what about when the watched area of the user's 2319 * address space is written into from within the kernel 2320 * ... wouldn't that still cause a breakpoint to be generated 2321 * from within kernel mode? 2322 */ 2323 2324 if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) { 2325 /* dr0 is enabled */ 2326 if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) 2327 return (EINVAL); 2328 } 2329 if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) { 2330 /* dr1 is enabled */ 2331 if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) 2332 return (EINVAL); 2333 } 2334 if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) { 2335 /* dr2 is enabled */ 2336 if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) 2337 return (EINVAL); 2338 } 2339 if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) { 2340 /* dr3 is enabled */ 2341 if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) 2342 return (EINVAL); 2343 } 2344 2345 pcb->pcb_dr0 = dbregs->dr[0]; 2346 pcb->pcb_dr1 = dbregs->dr[1]; 2347 pcb->pcb_dr2 = dbregs->dr[2]; 2348 pcb->pcb_dr3 = dbregs->dr[3]; 2349 pcb->pcb_dr6 = dbregs->dr[6]; 2350 pcb->pcb_dr7 = dbregs->dr[7]; 2351 2352 set_pcb_flags(pcb, PCB_DBREGS); 2353 } 2354 2355 return (0); 2356} 2357 2358void 2359reset_dbregs(void) 2360{ 2361 2362 load_dr7(0); /* Turn off the control bits first */ 2363 load_dr0(0); 2364 load_dr1(0); 2365 load_dr2(0); 2366 load_dr3(0); 2367 load_dr6(0); 2368} 2369 2370/* 2371 * Return > 0 if a hardware breakpoint has been hit, and the 2372 * breakpoint was in user space. Return 0, otherwise. 2373 */ 2374int 2375user_dbreg_trap(void) 2376{ 2377 u_int64_t dr7, dr6; /* debug registers dr6 and dr7 */ 2378 u_int64_t bp; /* breakpoint bits extracted from dr6 */ 2379 int nbp; /* number of breakpoints that triggered */ 2380 caddr_t addr[4]; /* breakpoint addresses */ 2381 int i; 2382 2383 dr7 = rdr7(); 2384 if ((dr7 & 0x000000ff) == 0) { 2385 /* 2386 * all GE and LE bits in the dr7 register are zero, 2387 * thus the trap couldn't have been caused by the 2388 * hardware debug registers 2389 */ 2390 return 0; 2391 } 2392 2393 nbp = 0; 2394 dr6 = rdr6(); 2395 bp = dr6 & 0x0000000f; 2396 2397 if (!bp) { 2398 /* 2399 * None of the breakpoint bits are set meaning this 2400 * trap was not caused by any of the debug registers 2401 */ 2402 return 0; 2403 } 2404 2405 /* 2406 * at least one of the breakpoints were hit, check to see 2407 * which ones and if any of them are user space addresses 2408 */ 2409 2410 if (bp & 0x01) { 2411 addr[nbp++] = (caddr_t)rdr0(); 2412 } 2413 if (bp & 0x02) { 2414 addr[nbp++] = (caddr_t)rdr1(); 2415 } 2416 if (bp & 0x04) { 2417 addr[nbp++] = (caddr_t)rdr2(); 2418 } 2419 if (bp & 0x08) { 2420 addr[nbp++] = (caddr_t)rdr3(); 2421 } 2422 2423 for (i = 0; i < nbp; i++) { 2424 if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) { 2425 /* 2426 * addr[i] is in user space 2427 */ 2428 return nbp; 2429 } 2430 } 2431 2432 /* 2433 * None of the breakpoints are in user space. 2434 */ 2435 return 0; 2436} 2437 2438#ifdef KDB 2439 2440/* 2441 * Provide inb() and outb() as functions. They are normally only available as 2442 * inline functions, thus cannot be called from the debugger. 2443 */ 2444 2445/* silence compiler warnings */ 2446u_char inb_(u_short); 2447void outb_(u_short, u_char); 2448 2449u_char 2450inb_(u_short port) 2451{ 2452 return inb(port); 2453} 2454 2455void 2456outb_(u_short port, u_char data) 2457{ 2458 outb(port, data); 2459} 2460 2461#endif /* KDB */ 2462