vm_machdep.c revision 60041
1221420Sdes/*- 292555Sdes * Copyright (c) 1982, 1986 The Regents of the University of California. 360573Skris * Copyright (c) 1989, 1990 William Jolitz 465668Skris * Copyright (c) 1994 John Dyson 565668Skris * All rights reserved. 665668Skris * 765668Skris * This code is derived from software contributed to Berkeley by 865668Skris * the Systems Programming Group of the University of Utah Computer 965668Skris * Science Department, and William Jolitz. 1065668Skris * 1165668Skris * Redistribution and use in source and binary forms, with or without 1265668Skris * modification, are permitted provided that the following conditions 1365668Skris * are met: 1465668Skris * 1. Redistributions of source code must retain the above copyright 1565668Skris * notice, this list of conditions and the following disclaimer. 1665668Skris * 2. Redistributions in binary form must reproduce the above copyright 1765668Skris * notice, this list of conditions and the following disclaimer in the 1865668Skris * documentation and/or other materials provided with the distribution. 1965668Skris * 3. All advertising materials mentioning features or use of this software 2065668Skris * must display the following acknowledgement: 2165668Skris * This product includes software developed by the University of 2265668Skris * California, Berkeley and its contributors. 2365668Skris * 4. Neither the name of the University nor the names of its contributors 2465668Skris * may be used to endorse or promote products derived from this software 2565668Skris * without specific prior written permission. 2665668Skris * 2765668Skris * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2861209Skris * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2960573Skris * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 3060573Skris * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 3160573Skris * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3260573Skris * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3360573Skris * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3460573Skris * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3560573Skris * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3660573Skris * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3760573Skris * SUCH DAMAGE. 3860573Skris * 3960573Skris * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 4060573Skris * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ 4160573Skris * $FreeBSD: head/sys/powerpc/aim/vm_machdep.c 60041 2000-05-05 09:59:14Z phk $ 4260573Skris */ 4360573Skris/* 4460573Skris * Copyright (c) 1994, 1995, 1996 Carnegie-Mellon University. 4560573Skris * All rights reserved. 4660573Skris * 4760573Skris * Author: Chris G. Demetriou 4860573Skris * 4960573Skris * Permission to use, copy, modify and distribute this software and 5060573Skris * its documentation is hereby granted, provided that both the copyright 5160573Skris * notice and this permission notice appear in all copies of the 5260573Skris * software, derivative works or modified versions, and any portions 5360573Skris * thereof, and that both notices appear in supporting documentation. 5460573Skris * 5560573Skris * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 5660573Skris * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 5760573Skris * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 5892555Sdes * 5992555Sdes * Carnegie Mellon requests users of this software to return to 6092555Sdes * 6192555Sdes * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 6292555Sdes * School of Computer Science 6392555Sdes * Carnegie Mellon University 64124208Sdes * Pittsburgh PA 15213-3890 65124208Sdes * 6692555Sdes * any improvements or extensions that they make and grant Carnegie the 6792555Sdes * rights to redistribute these changes. 6892555Sdes */ 6992555Sdes 7092555Sdes#include <sys/param.h> 7192555Sdes#include <sys/systm.h> 7292555Sdes#include <sys/proc.h> 7392555Sdes#include <sys/malloc.h> 7492555Sdes#include <sys/bio.h> 7560573Skris#include <sys/buf.h> 7660573Skris#include <sys/vnode.h> 7760573Skris#include <sys/vmmeter.h> 7860573Skris#include <sys/kernel.h> 7960573Skris#include <sys/sysctl.h> 8060573Skris#include <sys/unistd.h> 8160573Skris 8260573Skris#include <machine/clock.h> 8360573Skris#include <machine/cpu.h> 8460573Skris#include <machine/fpu.h> 8560573Skris#include <machine/md_var.h> 8660573Skris#include <machine/prom.h> 8760573Skris 8860573Skris#include <vm/vm.h> 8960573Skris#include <vm/vm_param.h> 9060573Skris#include <sys/lock.h> 9160573Skris#include <vm/vm_kern.h> 9260573Skris#include <vm/vm_page.h> 9360573Skris#include <vm/vm_map.h> 9469587Sgreen#include <vm/vm_extern.h> 9576259Sgreen 9669587Sgreen#include <sys/user.h> 9769587Sgreen 9869587Sgreen/* 9976259Sgreen * quick version of vm_fault 10069587Sgreen */ 101221420Sdesint 102221420Sdesvm_fault_quick(v, prot) 103221420Sdes caddr_t v; 104221420Sdes int prot; 10560573Skris{ 10660573Skris int r; 10760573Skris if (prot & VM_PROT_WRITE) 10860573Skris r = subyte(v, fubyte(v)); 10960573Skris else 11060573Skris r = fubyte(v); 11160573Skris return(r); 11260573Skris} 11360573Skris 11460573Skris/* 11560573Skris * Finish a fork operation, with process p2 nearly set up. 11660573Skris * Copy and update the pcb, set up the stack so that the child 11760573Skris * ready to run and return to user mode. 118192595Sdes */ 119192595Sdesvoid 120192595Sdescpu_fork(p1, p2, flags) 121192595Sdes register struct proc *p1, *p2; 122192595Sdes int flags; 123192595Sdes{ 12460573Skris struct user *up = p2->p_addr; 12560573Skris 12660573Skris if ((flags & RFPROC) == 0) 12760573Skris return; 12860573Skris 12960573Skris p2->p_md.md_tf = p1->p_md.md_tf; 13060573Skris p2->p_md.md_flags = p1->p_md.md_flags & (MDP_FPUSED | MDP_UAC_MASK); 13160573Skris 13260573Skris /* 13360573Skris * Cache the physical address of the pcb, so we can 13460573Skris * swap to it easily. 13560573Skris */ 13660573Skris p2->p_md.md_pcbpaddr = (void*) vtophys((vm_offset_t) &up->u_pcb); 13760573Skris 13860573Skris /* 13960573Skris * Copy floating point state from the FP chip to the PCB 14060573Skris * if this process has state stored there. 14160573Skris */ 14260573Skris alpha_fpstate_save(p1, 0); 14360573Skris 14460573Skris /* 14560573Skris * Copy pcb and stack from proc p1 to p2. We do this as 14660573Skris * cheaply as possible, copying only the active part of the 14760573Skris * stack. The stack and pcb need to agree. Make sure that the 14860573Skris * new process has FEN disabled. 14960573Skris */ 15060573Skris p2->p_addr->u_pcb = p1->p_addr->u_pcb; 15161209Skris p2->p_addr->u_pcb.pcb_hw.apcb_usp = alpha_pal_rdusp(); 15260573Skris p2->p_addr->u_pcb.pcb_hw.apcb_flags &= ~ALPHA_PCB_FLAGS_FEN; 15360573Skris 15460573Skris /* 15560573Skris * Set the floating point state. 15660573Skris */ 15760573Skris if ((p2->p_addr->u_pcb.pcb_fp_control & IEEE_INHERIT) == 0) { 15860573Skris p2->p_addr->u_pcb.pcb_fp_control = 0; 15961209Skris p2->p_addr->u_pcb.pcb_fp.fpr_cr = (FPCR_DYN_NORMAL 16061209Skris | FPCR_INVD | FPCR_DZED 16161209Skris | FPCR_OVFD | FPCR_INED 16261209Skris | FPCR_UNFD); 16360573Skris } 16460573Skris 16560573Skris /* 16660573Skris * Arrange for a non-local goto when the new process 16760573Skris * is started, to resume here, returning nonzero from setjmp. 16860573Skris */ 16960573Skris#ifdef DIAGNOSTIC 17060573Skris if (p1 != curproc) 17160573Skris panic("cpu_fork: curproc"); 172192595Sdes alpha_fpstate_check(p1); 173204917Sdes#endif 174204917Sdes 175204917Sdes /* 176204917Sdes * create the child's kernel stack, from scratch. 177204917Sdes */ 178204917Sdes { 179204917Sdes struct trapframe *p2tf; 180204917Sdes 181204917Sdes /* 182204917Sdes * Pick a stack pointer, leaving room for a trapframe; 183 * copy trapframe from parent so return to user mode 184 * will be to right address, with correct registers. 185 */ 186 p2tf = p2->p_md.md_tf = (struct trapframe *) 187 ((char *)p2->p_addr + USPACE - sizeof(struct trapframe)); 188 bcopy(p1->p_md.md_tf, p2->p_md.md_tf, 189 sizeof(struct trapframe)); 190 191 /* 192 * Set up return-value registers as fork() libc stub expects. 193 */ 194 p2tf->tf_regs[FRAME_V0] = 0; /* child's pid (linux) */ 195 p2tf->tf_regs[FRAME_A3] = 0; /* no error */ 196 p2tf->tf_regs[FRAME_A4] = 1; /* is child (FreeBSD) */ 197 198 /* 199 * Arrange for continuation at child_return(), which 200 * will return to exception_return(). Note that the child 201 * process doesn't stay in the kernel for long! 202 * 203 * This is an inlined version of cpu_set_kpc. 204 */ 205 up->u_pcb.pcb_hw.apcb_ksp = (u_int64_t)p2tf; 206 up->u_pcb.pcb_context[0] = 207 (u_int64_t)child_return; /* s0: pc */ 208 up->u_pcb.pcb_context[1] = 209 (u_int64_t)exception_return; /* s1: ra */ 210 up->u_pcb.pcb_context[2] = (u_long) p2; /* s2: a0 */ 211 up->u_pcb.pcb_context[7] = 212 (u_int64_t)switch_trampoline; /* ra: assembly magic */ 213 } 214} 215 216/* 217 * Intercept the return address from a freshly forked process that has NOT 218 * been scheduled yet. 219 * 220 * This is needed to make kernel threads stay in kernel mode. 221 */ 222void 223cpu_set_fork_handler(p, func, arg) 224 struct proc *p; 225 void (*func) __P((void *)); 226 void *arg; 227{ 228 /* 229 * Note that the trap frame follows the args, so the function 230 * is really called like this: func(arg, frame); 231 */ 232 p->p_addr->u_pcb.pcb_context[0] = (u_long) func; 233 p->p_addr->u_pcb.pcb_context[2] = (u_long) arg; 234} 235 236/* 237 * cpu_exit is called as the last action during exit. 238 * We release the address space of the process, block interrupts, 239 * and call switch_exit. switch_exit switches to proc0's PCB and stack, 240 * then jumps into the middle of cpu_switch, as if it were switching 241 * from proc0. 242 */ 243void 244cpu_exit(p) 245 register struct proc *p; 246{ 247 alpha_fpstate_drop(p); 248 249 (void) splhigh(); 250 cnt.v_swtch++; 251 cpu_switch(p); 252 panic("cpu_exit"); 253} 254 255void 256cpu_wait(p) 257 struct proc *p; 258{ 259 /* drop per-process resources */ 260 pmap_dispose_proc(p); 261 262 /* and clean-out the vmspace */ 263 vmspace_free(p->p_vmspace); 264} 265 266/* 267 * Dump the machine specific header information at the start of a core dump. 268 */ 269int 270cpu_coredump(p, vp, cred) 271 struct proc *p; 272 struct vnode *vp; 273 struct ucred *cred; 274{ 275 276 return (vn_rdwr(UIO_WRITE, vp, (caddr_t) p->p_addr, ctob(UPAGES), 277 (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *)NULL, 278 p)); 279} 280 281#ifdef notyet 282static void 283setredzone(pte, vaddr) 284 u_short *pte; 285 caddr_t vaddr; 286{ 287/* eventually do this by setting up an expand-down stack segment 288 for ss0: selector, allowing stack access down to top of u. 289 this means though that protection violations need to be handled 290 thru a double fault exception that must do an integral task 291 switch to a known good context, within which a dump can be 292 taken. a sensible scheme might be to save the initial context 293 used by sched (that has physical memory mapped 1:1 at bottom) 294 and take the dump while still in mapped mode */ 295} 296#endif 297 298/* 299 * Map an IO request into kernel virtual address space. 300 * 301 * All requests are (re)mapped into kernel VA space. 302 * Notice that we use b_bufsize for the size of the buffer 303 * to be mapped. b_bcount might be modified by the driver. 304 */ 305void 306vmapbuf(bp) 307 register struct buf *bp; 308{ 309 register caddr_t addr, v, kva; 310 vm_offset_t pa; 311 312 if ((bp->b_flags & B_PHYS) == 0) 313 panic("vmapbuf"); 314 315 for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page(bp->b_data); 316 addr < bp->b_data + bp->b_bufsize; 317 addr += PAGE_SIZE, v += PAGE_SIZE) { 318 /* 319 * Do the vm_fault if needed; do the copy-on-write thing 320 * when reading stuff off device into memory. 321 */ 322 vm_fault_quick(addr, 323 (bp->b_iocmd == BIO_READ)?(VM_PROT_READ|VM_PROT_WRITE):VM_PROT_READ); 324 pa = trunc_page(pmap_kextract((vm_offset_t) addr)); 325 if (pa == 0) 326 panic("vmapbuf: page not present"); 327 vm_page_hold(PHYS_TO_VM_PAGE(pa)); 328 pmap_kenter((vm_offset_t) v, pa); 329 } 330 331 kva = bp->b_saveaddr; 332 bp->b_saveaddr = bp->b_data; 333 bp->b_data = kva + (((vm_offset_t) bp->b_data) & PAGE_MASK); 334} 335 336/* 337 * Free the io map PTEs associated with this IO operation. 338 * We also invalidate the TLB entries and restore the original b_addr. 339 */ 340void 341vunmapbuf(bp) 342 register struct buf *bp; 343{ 344 register caddr_t addr; 345 vm_offset_t pa; 346 347 if ((bp->b_flags & B_PHYS) == 0) 348 panic("vunmapbuf"); 349 350 for (addr = (caddr_t)trunc_page(bp->b_data); 351 addr < bp->b_data + bp->b_bufsize; 352 addr += PAGE_SIZE) { 353 pa = trunc_page(pmap_kextract((vm_offset_t) addr)); 354 pmap_kremove((vm_offset_t) addr); 355 vm_page_unhold(PHYS_TO_VM_PAGE(pa)); 356 } 357 358 bp->b_data = bp->b_saveaddr; 359} 360 361/* 362 * Force reset the processor by invalidating the entire address space! 363 */ 364void 365cpu_reset() 366{ 367 prom_halt(0); 368} 369 370int 371grow_stack(p, sp) 372 struct proc *p; 373 size_t sp; 374{ 375 int rv; 376 377 rv = vm_map_growstack (p, sp); 378 if (rv != KERN_SUCCESS) 379 return (0); 380 381 return (1); 382} 383 384 385static int cnt_prezero; 386 387SYSCTL_INT(_machdep, OID_AUTO, cnt_prezero, CTLFLAG_RD, &cnt_prezero, 0, ""); 388 389/* 390 * Implement the pre-zeroed page mechanism. 391 * This routine is called from the idle loop. 392 */ 393 394#define ZIDLE_LO(v) ((v) * 2 / 3) 395#define ZIDLE_HI(v) ((v) * 4 / 5) 396 397int 398vm_page_zero_idle() 399{ 400 static int free_rover; 401 static int zero_state; 402 vm_page_t m; 403 int s; 404 405 /* 406 * Attempt to maintain approximately 1/2 of our free pages in a 407 * PG_ZERO'd state. Add some hysteresis to (attempt to) avoid 408 * generally zeroing a page when the system is near steady-state. 409 * Otherwise we might get 'flutter' during disk I/O / IPC or 410 * fast sleeps. We also do not want to be continuously zeroing 411 * pages because doing so may flush our L1 and L2 caches too much. 412 */ 413 414 if (zero_state && vm_page_zero_count >= ZIDLE_LO(cnt.v_free_count)) 415 return(0); 416 if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count)) 417 return(0); 418 419#ifdef SMP 420 if (try_mplock()) { 421#endif 422 s = splvm(); 423 m = vm_page_list_find(PQ_FREE, free_rover, FALSE); 424 zero_state = 0; 425 if (m != NULL && (m->flags & PG_ZERO) == 0) { 426 vm_page_queues[m->queue].lcnt--; 427 TAILQ_REMOVE(&vm_page_queues[m->queue].pl, m, pageq); 428 m->queue = PQ_NONE; 429 splx(s); 430#if 0 431 rel_mplock(); 432#endif 433 pmap_zero_page(VM_PAGE_TO_PHYS(m)); 434#if 0 435 get_mplock(); 436#endif 437 (void)splvm(); 438 vm_page_flag_set(m, PG_ZERO); 439 m->queue = PQ_FREE + m->pc; 440 vm_page_queues[m->queue].lcnt++; 441 TAILQ_INSERT_TAIL(&vm_page_queues[m->queue].pl, m, 442 pageq); 443 ++vm_page_zero_count; 444 ++cnt_prezero; 445 if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count)) 446 zero_state = 1; 447 } 448 free_rover = (free_rover + PQ_PRIME2) & PQ_L2_MASK; 449 splx(s); 450#ifdef SMP 451 rel_mplock(); 452#endif 453 return (1); 454#ifdef SMP 455 } 456#endif 457 return (0); 458} 459 460/* 461 * Software interrupt handler for queued VM system processing. 462 */ 463void 464swi_vm() 465{ 466#if 0 467 if (busdma_swi_pending != 0) 468 busdma_swi(); 469#endif 470} 471 472/* 473 * Tell whether this address is in some physical memory region. 474 * Currently used by the kernel coredump code in order to avoid 475 * dumping the ``ISA memory hole'' which could cause indefinite hangs, 476 * or other unpredictable behaviour. 477 */ 478 479 480int 481is_physical_memory(addr) 482 vm_offset_t addr; 483{ 484 /* 485 * stuff other tests for known memory-mapped devices (PCI?) 486 * here 487 */ 488 489 return 1; 490} 491