vm_machdep.c revision 129750
1285SN/A/*- 2477SN/A * Copyright (c) 1982, 1986 The Regents of the University of California. 3285SN/A * Copyright (c) 1989, 1990 William Jolitz 4285SN/A * Copyright (c) 1994 John Dyson 5285SN/A * All rights reserved. 6285SN/A * 7285SN/A * This code is derived from software contributed to Berkeley by 8285SN/A * the Systems Programming Group of the University of Utah Computer 9285SN/A * Science Department, and William Jolitz. 10285SN/A * 11285SN/A * Redistribution and use in source and binary forms, with or without 12285SN/A * modification, are permitted provided that the following conditions 13285SN/A * are met: 14285SN/A * 1. Redistributions of source code must retain the above copyright 15285SN/A * notice, this list of conditions and the following disclaimer. 16285SN/A * 2. Redistributions in binary form must reproduce the above copyright 17285SN/A * notice, this list of conditions and the following disclaimer in the 18285SN/A * documentation and/or other materials provided with the distribution. 19285SN/A * 3. All advertising materials mentioning features or use of this software 20285SN/A * must display the following acknowledgement: 21285SN/A * This product includes software developed by the University of 22285SN/A * California, Berkeley and its contributors. 23285SN/A * 4. Neither the name of the University nor the names of its contributors 24285SN/A * may be used to endorse or promote products derived from this software 25285SN/A * without specific prior written permission. 26285SN/A * 27285SN/A * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28285SN/A * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29780Salanb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30285SN/A * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31285SN/A * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32285SN/A * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33285SN/A * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34477SN/A * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35285SN/A * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36285SN/A * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37285SN/A * SUCH DAMAGE. 38285SN/A * 39285SN/A * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 40285SN/A * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ 41285SN/A */ 42285SN/A 43285SN/A#include <sys/cdefs.h> 44285SN/A__FBSDID("$FreeBSD: head/sys/amd64/amd64/vm_machdep.c 129750 2004-05-26 12:09:39Z tmm $"); 45285SN/A 46285SN/A#include "opt_isa.h" 47285SN/A#include "opt_cpu.h" 48285SN/A 49285SN/A#include <sys/param.h> 50285SN/A#include <sys/systm.h> 51285SN/A#include <sys/bio.h> 52285SN/A#include <sys/buf.h> 53285SN/A#include <sys/kse.h> 54285SN/A#include <sys/kernel.h> 55285SN/A#include <sys/ktr.h> 56285SN/A#include <sys/lock.h> 57285SN/A#include <sys/malloc.h> 58285SN/A#include <sys/mbuf.h> 59285SN/A#include <sys/mutex.h> 60285SN/A#include <sys/proc.h> 61285SN/A#include <sys/sf_buf.h> 62285SN/A#include <sys/smp.h> 63285SN/A#include <sys/sysctl.h> 64285SN/A#include <sys/unistd.h> 65285SN/A#include <sys/user.h> 66285SN/A#include <sys/vnode.h> 67285SN/A#include <sys/vmmeter.h> 68285SN/A 69285SN/A#include <machine/cpu.h> 70285SN/A#include <machine/md_var.h> 71285SN/A#include <machine/pcb.h> 72285SN/A 73285SN/A#include <vm/vm.h> 74285SN/A#include <vm/vm_extern.h> 75285SN/A#include <vm/vm_kern.h> 76285SN/A#include <vm/vm_page.h> 77285SN/A#include <vm/vm_map.h> 78285SN/A#include <vm/vm_param.h> 79285SN/A 80285SN/A#include <amd64/isa/isa.h> 81285SN/A 82285SN/Astatic void cpu_reset_real(void); 83285SN/A#ifdef SMP 84285SN/Astatic void cpu_reset_proxy(void); 85285SN/Astatic u_int cpu_reset_proxyid; 86285SN/Astatic volatile u_int cpu_reset_proxy_active; 87285SN/A#endif 88285SN/A 89285SN/A/* 90285SN/A * Finish a fork operation, with process p2 nearly set up. 91285SN/A * Copy and update the pcb, set up the stack so that the child 92285SN/A * ready to run and return to user mode. 93285SN/A */ 94285SN/Avoid 95285SN/Acpu_fork(td1, p2, td2, flags) 96285SN/A register struct thread *td1; 97285SN/A register struct proc *p2; 98285SN/A struct thread *td2; 99285SN/A int flags; 100285SN/A{ 101285SN/A register struct proc *p1; 102285SN/A struct pcb *pcb2; 103285SN/A struct mdproc *mdp2; 104285SN/A 105285SN/A p1 = td1->td_proc; 106285SN/A if ((flags & RFPROC) == 0) 107285SN/A return; 108285SN/A 109285SN/A /* Ensure that p1's pcb is up to date. */ 110285SN/A fpuexit(td1); 111285SN/A 112285SN/A /* Point the pcb to the top of the stack */ 113285SN/A pcb2 = (struct pcb *)(td2->td_kstack + 114285SN/A td2->td_kstack_pages * PAGE_SIZE) - 1; 115285SN/A td2->td_pcb = pcb2; 116285SN/A 117285SN/A /* Copy p1's pcb */ 118285SN/A bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); 119285SN/A 120285SN/A /* Point mdproc and then copy over td1's contents */ 121285SN/A mdp2 = &p2->p_md; 122285SN/A bcopy(&p1->p_md, mdp2, sizeof(*mdp2)); 123285SN/A 124285SN/A /* 125285SN/A * Create a new fresh stack for the new process. 126285SN/A * Copy the trap frame for the return to user mode as if from a 127285SN/A * syscall. This copies most of the user mode register values. 128285SN/A */ 129285SN/A td2->td_frame = (struct trapframe *)td2->td_pcb - 1; 130285SN/A bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe)); 131285SN/A 132285SN/A td2->td_frame->tf_rax = 0; /* Child returns zero */ 133285SN/A td2->td_frame->tf_rflags &= ~PSL_C; /* success */ 134285SN/A td2->td_frame->tf_rdx = 1; 135285SN/A 136285SN/A /* 137285SN/A * Set registers for trampoline to user mode. Leave space for the 138285SN/A * return address on stack. These are the kernel mode register values. 139285SN/A */ 140285SN/A pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pml4); 141285SN/A pcb2->pcb_r12 = (register_t)fork_return; /* fork_trampoline argument */ 142285SN/A pcb2->pcb_rbp = 0; 143285SN/A pcb2->pcb_rsp = (register_t)td2->td_frame - sizeof(void *); 144285SN/A pcb2->pcb_rbx = (register_t)td2; /* fork_trampoline argument */ 145285SN/A pcb2->pcb_rip = (register_t)fork_trampoline; 146285SN/A pcb2->pcb_rflags = td2->td_frame->tf_rflags & ~PSL_I; /* ints disabled */ 147285SN/A /*- 148285SN/A * pcb2->pcb_dr*: cloned above. 149285SN/A * pcb2->pcb_savefpu: cloned above. 150285SN/A * pcb2->pcb_flags: cloned above. 151285SN/A * pcb2->pcb_onfault: cloned above (always NULL here?). 152285SN/A * pcb2->pcb_[fg]sbase: cloned above 153285SN/A */ 154285SN/A 155285SN/A /* 156285SN/A * Now, cpu_switch() can schedule the new process. 157285SN/A * pcb_rsp is loaded pointing to the cpu_switch() stack frame 158285SN/A * containing the return address when exiting cpu_switch. 159285SN/A * This will normally be to fork_trampoline(), which will have 160285SN/A * %ebx loaded with the new proc's pointer. fork_trampoline() 161285SN/A * will set up a stack to call fork_return(p, frame); to complete 162285SN/A * the return to user-mode. 163285SN/A */ 164285SN/A} 165285SN/A 166285SN/A/* 167285SN/A * Intercept the return address from a freshly forked process that has NOT 168285SN/A * been scheduled yet. 169285SN/A * 170285SN/A * This is needed to make kernel threads stay in kernel mode. 171285SN/A */ 172285SN/Avoid 173285SN/Acpu_set_fork_handler(td, func, arg) 174285SN/A struct thread *td; 175285SN/A void (*func)(void *); 176285SN/A void *arg; 177285SN/A{ 178285SN/A /* 179285SN/A * Note that the trap frame follows the args, so the function 180285SN/A * is really called like this: func(arg, frame); 181285SN/A */ 182285SN/A td->td_pcb->pcb_r12 = (long) func; /* function */ 183285SN/A td->td_pcb->pcb_rbx = (long) arg; /* first arg */ 184285SN/A} 185285SN/A 186285SN/Avoid 187285SN/Acpu_exit(struct thread *td) 188285SN/A{ 189285SN/A struct pcb *pcb = td->td_pcb; 190285SN/A 191285SN/A if (pcb->pcb_flags & PCB_DBREGS) { 192285SN/A /* disable all hardware breakpoints */ 193285SN/A reset_dbregs(); 194285SN/A pcb->pcb_flags &= ~PCB_DBREGS; 195285SN/A } 196285SN/A} 197285SN/A 198285SN/Avoid 199285SN/Acpu_thread_exit(struct thread *td) 200285SN/A{ 201285SN/A struct pcb *pcb = td->td_pcb; 202285SN/A 203285SN/A if (td == PCPU_GET(fpcurthread)) 204285SN/A fpudrop(); 205285SN/A if (pcb->pcb_flags & PCB_DBREGS) { 206285SN/A /* disable all hardware breakpoints */ 207285SN/A reset_dbregs(); 208285SN/A pcb->pcb_flags &= ~PCB_DBREGS; 209285SN/A } 210285SN/A} 211285SN/A 212285SN/Avoid 213285SN/Acpu_thread_clean(struct thread *td) 214285SN/A{ 215285SN/A} 216285SN/A 217285SN/Avoid 218285SN/Acpu_thread_swapin(struct thread *td) 219285SN/A{ 220285SN/A} 221285SN/A 222285SN/Avoid 223285SN/Acpu_thread_swapout(struct thread *td) 224285SN/A{ 225285SN/A} 226285SN/A 227285SN/Avoid 228285SN/Acpu_thread_setup(struct thread *td) 229285SN/A{ 230285SN/A 231285SN/A td->td_pcb = (struct pcb *)(td->td_kstack + 232285SN/A td->td_kstack_pages * PAGE_SIZE) - 1; 233285SN/A td->td_frame = (struct trapframe *)td->td_pcb - 1; 234285SN/A} 235285SN/A 236285SN/A/* 237285SN/A * Initialize machine state (pcb and trap frame) for a new thread about to 238285SN/A * upcall. Pu t enough state in the new thread's PCB to get it to go back 239285SN/A * userret(), where we can intercept it again to set the return (upcall) 240285SN/A * Address and stack, along with those from upcals that are from other sources 241285SN/A * such as those generated in thread_userret() itself. 242285SN/A */ 243285SN/Avoid 244285SN/Acpu_set_upcall(struct thread *td, struct thread *td0) 245285SN/A{ 246285SN/A struct pcb *pcb2; 247285SN/A 248285SN/A /* Point the pcb to the top of the stack. */ 249285SN/A pcb2 = td->td_pcb; 250285SN/A 251285SN/A /* 252285SN/A * Copy the upcall pcb. This loads kernel regs. 253285SN/A * Those not loaded individually below get their default 254285SN/A * values here. 255285SN/A * 256285SN/A * XXXKSE It might be a good idea to simply skip this as 257285SN/A * the values of the other registers may be unimportant. 258285SN/A * This would remove any requirement for knowing the KSE 259285SN/A * at this time (see the matching comment below for 260285SN/A * more analysis) (need a good safe default). 261285SN/A */ 262285SN/A bcopy(td0->td_pcb, pcb2, sizeof(*pcb2)); 263285SN/A pcb2->pcb_flags &= ~PCB_FPUINITDONE; 264285SN/A 265285SN/A /* 266285SN/A * Create a new fresh stack for the new thread. 267285SN/A * Don't forget to set this stack value into whatever supplies 268285SN/A * the address for the fault handlers. 269285SN/A * The contexts are filled in at the time we actually DO the 270285SN/A * upcall as only then do we know which KSE we got. 271285SN/A */ 272285SN/A bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); 273285SN/A 274285SN/A /* 275285SN/A * Set registers for trampoline to user mode. Leave space for the 276285SN/A * return address on stack. These are the kernel mode register values. 277285SN/A */ 278285SN/A pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pml4); 279285SN/A pcb2->pcb_r12 = (register_t)fork_return; /* trampoline arg */ 280285SN/A pcb2->pcb_rbp = 0; 281285SN/A pcb2->pcb_rsp = (register_t)td->td_frame - sizeof(void *); /* trampoline arg */ 282285SN/A pcb2->pcb_rbx = (register_t)td; /* trampoline arg */ 283285SN/A pcb2->pcb_rip = (register_t)fork_trampoline; 284285SN/A pcb2->pcb_rflags = PSL_KERNEL; /* ints disabled */ 285285SN/A /* 286285SN/A * If we didn't copy the pcb, we'd need to do the following registers: 287285SN/A * pcb2->pcb_dr*: cloned above. 288285SN/A * pcb2->pcb_savefpu: cloned above. 289285SN/A * pcb2->pcb_rflags: cloned above. 290285SN/A * pcb2->pcb_onfault: cloned above (always NULL here?). 291285SN/A * pcb2->pcb_[fg]sbase: cloned above 292285SN/A */ 293285SN/A} 294285SN/A 295285SN/A/* 296285SN/A * Set that machine state for performing an upcall that has to 297285SN/A * be done in thread_userret() so that those upcalls generated 298285SN/A * in thread_userret() itself can be done as well. 299285SN/A */ 300285SN/Avoid 301285SN/Acpu_set_upcall_kse(struct thread *td, struct kse_upcall *ku) 302285SN/A{ 303285SN/A 304285SN/A /* 305285SN/A * Do any extra cleaning that needs to be done. 306285SN/A * The thread may have optional components 307285SN/A * that are not present in a fresh thread. 308285SN/A * This may be a recycled thread so make it look 309285SN/A * as though it's newly allocated. 310285SN/A */ 311285SN/A cpu_thread_clean(td); 312285SN/A 313285SN/A /* 314285SN/A * Set the trap frame to point at the beginning of the uts 315285SN/A * function. 316285SN/A */ 317285SN/A td->td_frame->tf_rsp = 318285SN/A ((register_t)ku->ku_stack.ss_sp + ku->ku_stack.ss_size) & ~0x0f; 319285SN/A td->td_frame->tf_rsp -= 8; 320285SN/A td->td_frame->tf_rip = (register_t)ku->ku_func; 321285SN/A 322285SN/A /* 323285SN/A * Pass the address of the mailbox for this kse to the uts 324285SN/A * function as a parameter on the stack. 325285SN/A */ 326285SN/A td->td_frame->tf_rdi = (register_t)ku->ku_mailbox; 327285SN/A} 328285SN/A 329285SN/A 330285SN/A/* 331285SN/A * Force reset the processor by invalidating the entire address space! 332285SN/A */ 333285SN/A 334285SN/A#ifdef SMP 335285SN/Astatic void 336285SN/Acpu_reset_proxy() 337285SN/A{ 338285SN/A 339285SN/A cpu_reset_proxy_active = 1; 340285SN/A while (cpu_reset_proxy_active == 1) 341285SN/A ; /* Wait for other cpu to see that we've started */ 342285SN/A stop_cpus((1<<cpu_reset_proxyid)); 343285SN/A printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid); 344285SN/A DELAY(1000000); 345285SN/A cpu_reset_real(); 346285SN/A} 347285SN/A#endif 348285SN/A 349285SN/Avoid 350285SN/Acpu_reset() 351285SN/A{ 352285SN/A#ifdef SMP 353285SN/A if (smp_active == 0) { 354285SN/A cpu_reset_real(); 355285SN/A /* NOTREACHED */ 356285SN/A } else { 357285SN/A 358285SN/A u_int map; 359285SN/A int cnt; 360285SN/A printf("cpu_reset called on cpu#%d\n", PCPU_GET(cpuid)); 361285SN/A 362285SN/A map = PCPU_GET(other_cpus) & ~ stopped_cpus; 363285SN/A 364285SN/A if (map != 0) { 365285SN/A printf("cpu_reset: Stopping other CPUs\n"); 366285SN/A stop_cpus(map); /* Stop all other CPUs */ 367285SN/A } 368285SN/A 369285SN/A if (PCPU_GET(cpuid) == 0) { 370285SN/A DELAY(1000000); 371285SN/A cpu_reset_real(); 372285SN/A /* NOTREACHED */ 373285SN/A } else { 374285SN/A /* We are not BSP (CPU #0) */ 375285SN/A 376285SN/A cpu_reset_proxyid = PCPU_GET(cpuid); 377285SN/A cpustop_restartfunc = cpu_reset_proxy; 378285SN/A cpu_reset_proxy_active = 0; 379285SN/A printf("cpu_reset: Restarting BSP\n"); 380285SN/A started_cpus = (1<<0); /* Restart CPU #0 */ 381285SN/A 382285SN/A cnt = 0; 383285SN/A while (cpu_reset_proxy_active == 0 && cnt < 10000000) 384285SN/A cnt++; /* Wait for BSP to announce restart */ 385285SN/A if (cpu_reset_proxy_active == 0) 386285SN/A printf("cpu_reset: Failed to restart BSP\n"); 387285SN/A enable_intr(); 388285SN/A cpu_reset_proxy_active = 2; 389285SN/A 390285SN/A while (1); 391285SN/A /* NOTREACHED */ 392285SN/A } 393285SN/A } 394285SN/A#else 395285SN/A cpu_reset_real(); 396285SN/A#endif 397285SN/A} 398285SN/A 399285SN/Astatic void 400285SN/Acpu_reset_real() 401285SN/A{ 402285SN/A 403285SN/A /* 404285SN/A * Attempt to do a CPU reset via the keyboard controller, 405285SN/A * do not turn of the GateA20, as any machine that fails 406285SN/A * to do the reset here would then end up in no man's land. 407285SN/A */ 408285SN/A 409285SN/A outb(IO_KBD + 4, 0xFE); 410285SN/A DELAY(500000); /* wait 0.5 sec to see if that did it */ 411285SN/A printf("Keyboard reset did not work, attempting CPU shutdown\n"); 412285SN/A DELAY(1000000); /* wait 1 sec for printf to complete */ 413285SN/A /* force a shutdown by unmapping entire address space ! */ 414285SN/A bzero((caddr_t)PML4map, PAGE_SIZE); 415285SN/A 416285SN/A /* "good night, sweet prince .... <THUNK!>" */ 417285SN/A invltlb(); 418285SN/A /* NOTREACHED */ 419285SN/A while(1); 420285SN/A} 421285SN/A 422/* 423 * Allocate an sf_buf for the given vm_page. On this machine, however, there 424 * is no sf_buf object. Instead, an opaque pointer to the given vm_page is 425 * returned. 426 */ 427struct sf_buf * 428sf_buf_alloc(struct vm_page *m, int pri) 429{ 430 431 return ((struct sf_buf *)m); 432} 433 434/* 435 * Free the sf_buf. In fact, do nothing because there are no resources 436 * associated with the sf_buf. 437 */ 438void 439sf_buf_free(struct sf_buf *sf) 440{ 441} 442 443/* 444 * Software interrupt handler for queued VM system processing. 445 */ 446void 447swi_vm(void *dummy) 448{ 449 if (busdma_swi_pending != 0) 450 busdma_swi(); 451} 452 453/* 454 * Tell whether this address is in some physical memory region. 455 * Currently used by the kernel coredump code in order to avoid 456 * dumping the ``ISA memory hole'' which could cause indefinite hangs, 457 * or other unpredictable behaviour. 458 */ 459 460int 461is_physical_memory(vm_paddr_t addr) 462{ 463 464#ifdef DEV_ISA 465 /* The ISA ``memory hole''. */ 466 if (addr >= 0xa0000 && addr < 0x100000) 467 return 0; 468#endif 469 470 /* 471 * stuff other tests for known memory-mapped devices (PCI?) 472 * here 473 */ 474 475 return 1; 476} 477