vm_machdep.c revision 78342
136865Sdfr/*- 236865Sdfr * Copyright (c) 1982, 1986 The Regents of the University of California. 336865Sdfr * Copyright (c) 1989, 1990 William Jolitz 436865Sdfr * Copyright (c) 1994 John Dyson 536865Sdfr * All rights reserved. 636865Sdfr * 736865Sdfr * This code is derived from software contributed to Berkeley by 836865Sdfr * the Systems Programming Group of the University of Utah Computer 936865Sdfr * Science Department, and William Jolitz. 1036865Sdfr * 1136865Sdfr * Redistribution and use in source and binary forms, with or without 1236865Sdfr * modification, are permitted provided that the following conditions 1336865Sdfr * are met: 1436865Sdfr * 1. Redistributions of source code must retain the above copyright 1536865Sdfr * notice, this list of conditions and the following disclaimer. 1636865Sdfr * 2. Redistributions in binary form must reproduce the above copyright 1736865Sdfr * notice, this list of conditions and the following disclaimer in the 1836865Sdfr * documentation and/or other materials provided with the distribution. 1936865Sdfr * 3. All advertising materials mentioning features or use of this software 2036865Sdfr * must display the following acknowledgement: 2136865Sdfr * This product includes software developed by the University of 2236865Sdfr * California, Berkeley and its contributors. 2336865Sdfr * 4. Neither the name of the University nor the names of its contributors 2436865Sdfr * may be used to endorse or promote products derived from this software 2536865Sdfr * without specific prior written permission. 2636865Sdfr * 2736865Sdfr * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2836865Sdfr * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2936865Sdfr * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 3036865Sdfr * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 3136865Sdfr * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3236865Sdfr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3336865Sdfr * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3436865Sdfr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3536865Sdfr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3636865Sdfr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3736865Sdfr * SUCH DAMAGE. 3836865Sdfr * 3936865Sdfr * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 4036865Sdfr * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ 4150477Speter * $FreeBSD: head/sys/powerpc/aim/vm_machdep.c 78342 2001-06-16 07:14:07Z benno $ 4236865Sdfr */ 4336865Sdfr/* 4436865Sdfr * Copyright (c) 1994, 1995, 1996 Carnegie-Mellon University. 4536865Sdfr * All rights reserved. 4636865Sdfr * 4736865Sdfr * Author: Chris G. Demetriou 4836865Sdfr * 4936865Sdfr * Permission to use, copy, modify and distribute this software and 5036865Sdfr * its documentation is hereby granted, provided that both the copyright 5136865Sdfr * notice and this permission notice appear in all copies of the 5236865Sdfr * software, derivative works or modified versions, and any portions 5336865Sdfr * thereof, and that both notices appear in supporting documentation. 5436865Sdfr * 5536865Sdfr * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 5636865Sdfr * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 5736865Sdfr * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 5836865Sdfr * 5936865Sdfr * Carnegie Mellon requests users of this software to return to 6036865Sdfr * 6136865Sdfr * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 6236865Sdfr * School of Computer Science 6336865Sdfr * Carnegie Mellon University 6436865Sdfr * Pittsburgh PA 15213-3890 6536865Sdfr * 6636865Sdfr * any improvements or extensions that they make and grant Carnegie the 6736865Sdfr * rights to redistribute these changes. 6836865Sdfr */ 6936865Sdfr 7036865Sdfr#include <sys/param.h> 7136865Sdfr#include <sys/systm.h> 7236865Sdfr#include <sys/proc.h> 7336865Sdfr#include <sys/malloc.h> 7460041Sphk#include <sys/bio.h> 7536865Sdfr#include <sys/buf.h> 7667365Sjhb#include <sys/mutex.h> 7736865Sdfr#include <sys/vnode.h> 7836865Sdfr#include <sys/vmmeter.h> 7936865Sdfr#include <sys/kernel.h> 8036865Sdfr#include <sys/sysctl.h> 8154207Speter#include <sys/unistd.h> 8236865Sdfr 8336865Sdfr#include <machine/clock.h> 8436865Sdfr#include <machine/cpu.h> 8541499Sdfr#include <machine/fpu.h> 8636865Sdfr#include <machine/md_var.h> 8736865Sdfr 8878342Sbenno#include <dev/ofw/openfirm.h> 8978342Sbenno 9036865Sdfr#include <vm/vm.h> 9136865Sdfr#include <vm/vm_param.h> 9236865Sdfr#include <sys/lock.h> 9336865Sdfr#include <vm/vm_kern.h> 9436865Sdfr#include <vm/vm_page.h> 9536865Sdfr#include <vm/vm_map.h> 9636865Sdfr#include <vm/vm_extern.h> 9736865Sdfr 9836865Sdfr#include <sys/user.h> 9936865Sdfr 10036865Sdfr/* 10136865Sdfr * quick version of vm_fault 10236865Sdfr */ 10351474Sdillonint 10436865Sdfrvm_fault_quick(v, prot) 10536865Sdfr caddr_t v; 10636865Sdfr int prot; 10736865Sdfr{ 10851474Sdillon int r; 10936865Sdfr if (prot & VM_PROT_WRITE) 11051474Sdillon r = subyte(v, fubyte(v)); 11136865Sdfr else 11251474Sdillon r = fubyte(v); 11351474Sdillon return(r); 11436865Sdfr} 11536865Sdfr 11636865Sdfr/* 11736865Sdfr * Finish a fork operation, with process p2 nearly set up. 11836865Sdfr * Copy and update the pcb, set up the stack so that the child 11936865Sdfr * ready to run and return to user mode. 12036865Sdfr */ 12136865Sdfrvoid 12254188Sluoqicpu_fork(p1, p2, flags) 12336865Sdfr register struct proc *p1, *p2; 12454188Sluoqi int flags; 12536865Sdfr{ 12678342Sbenno /* XXX: coming soon... */ 12736865Sdfr} 12836865Sdfr 12936865Sdfr/* 13036865Sdfr * Intercept the return address from a freshly forked process that has NOT 13136865Sdfr * been scheduled yet. 13236865Sdfr * 13336865Sdfr * This is needed to make kernel threads stay in kernel mode. 13436865Sdfr */ 13536865Sdfrvoid 13636865Sdfrcpu_set_fork_handler(p, func, arg) 13736865Sdfr struct proc *p; 13848391Speter void (*func) __P((void *)); 13948391Speter void *arg; 14036865Sdfr{ 14136865Sdfr /* 14236865Sdfr * Note that the trap frame follows the args, so the function 14336865Sdfr * is really called like this: func(arg, frame); 14436865Sdfr */ 14578342Sbenno#if 0 /* XXX */ 14636865Sdfr p->p_addr->u_pcb.pcb_context[0] = (u_long) func; 14736865Sdfr p->p_addr->u_pcb.pcb_context[2] = (u_long) arg; 14878342Sbenno#endif 14936865Sdfr} 15036865Sdfr 15136865Sdfr/* 15236865Sdfr * cpu_exit is called as the last action during exit. 15336865Sdfr * We release the address space of the process, block interrupts, 15436865Sdfr * and call switch_exit. switch_exit switches to proc0's PCB and stack, 15536865Sdfr * then jumps into the middle of cpu_switch, as if it were switching 15636865Sdfr * from proc0. 15736865Sdfr */ 15836865Sdfrvoid 15936865Sdfrcpu_exit(p) 16036865Sdfr register struct proc *p; 16136865Sdfr{ 16273922Sjhb PROC_LOCK(p); 16372200Sbmilekic mtx_lock_spin(&sched_lock); 16472200Sbmilekic mtx_unlock_flags(&Giant, MTX_NOSWITCH); 16568762Sjhb mtx_assert(&Giant, MA_NOTOWNED); 16670317Sjake 16770317Sjake /* 16870317Sjake * We have to wait until after releasing all locks before 16970317Sjake * changing p_stat. If we block on a mutex then we will be 17070317Sjake * back at SRUN when we resume and our parent will never 17170317Sjake * harvest us. 17270317Sjake */ 17370317Sjake p->p_stat = SZOMB; 17470317Sjake 17578342Sbenno mp_fixme("assumption: p_pptr won't change at this time"); 17670317Sjake wakeup(p->p_pptr); 17773922Sjhb PROC_UNLOCK_NOSWITCH(p); 17870317Sjake 17936865Sdfr cnt.v_swtch++; 18065557Sjasone cpu_switch(); 18136865Sdfr panic("cpu_exit"); 18236865Sdfr} 18336865Sdfr 18436865Sdfrvoid 18536865Sdfrcpu_wait(p) 18636865Sdfr struct proc *p; 18736865Sdfr{ 18876932Sgallatin 18976932Sgallatin mtx_lock(&vm_mtx); 19036865Sdfr /* drop per-process resources */ 19136865Sdfr pmap_dispose_proc(p); 19236865Sdfr 19336865Sdfr /* and clean-out the vmspace */ 19436865Sdfr vmspace_free(p->p_vmspace); 19576932Sgallatin mtx_unlock(&vm_mtx); 19636865Sdfr} 19736865Sdfr 19836865Sdfr/* 19936865Sdfr * Dump the machine specific header information at the start of a core dump. 20036865Sdfr */ 20136865Sdfrint 20236865Sdfrcpu_coredump(p, vp, cred) 20336865Sdfr struct proc *p; 20436865Sdfr struct vnode *vp; 20536865Sdfr struct ucred *cred; 20636865Sdfr{ 20736865Sdfr 20836865Sdfr return (vn_rdwr(UIO_WRITE, vp, (caddr_t) p->p_addr, ctob(UPAGES), 20936865Sdfr (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *)NULL, 21036865Sdfr p)); 21136865Sdfr} 21236865Sdfr 21336865Sdfr#ifdef notyet 21436865Sdfrstatic void 21536865Sdfrsetredzone(pte, vaddr) 21636865Sdfr u_short *pte; 21736865Sdfr caddr_t vaddr; 21836865Sdfr{ 21936865Sdfr/* eventually do this by setting up an expand-down stack segment 22036865Sdfr for ss0: selector, allowing stack access down to top of u. 22136865Sdfr this means though that protection violations need to be handled 22236865Sdfr thru a double fault exception that must do an integral task 22336865Sdfr switch to a known good context, within which a dump can be 22436865Sdfr taken. a sensible scheme might be to save the initial context 22536865Sdfr used by sched (that has physical memory mapped 1:1 at bottom) 22636865Sdfr and take the dump while still in mapped mode */ 22736865Sdfr} 22836865Sdfr#endif 22936865Sdfr 23036865Sdfr/* 23136865Sdfr * Map an IO request into kernel virtual address space. 23236865Sdfr * 23336865Sdfr * All requests are (re)mapped into kernel VA space. 23436865Sdfr * Notice that we use b_bufsize for the size of the buffer 23536865Sdfr * to be mapped. b_bcount might be modified by the driver. 23636865Sdfr */ 23736865Sdfrvoid 23836865Sdfrvmapbuf(bp) 23936865Sdfr register struct buf *bp; 24036865Sdfr{ 24136865Sdfr register caddr_t addr, v, kva; 24236865Sdfr vm_offset_t pa; 24336865Sdfr 24436865Sdfr if ((bp->b_flags & B_PHYS) == 0) 24536865Sdfr panic("vmapbuf"); 24636865Sdfr 24776932Sgallatin mtx_lock(&vm_mtx); 24836865Sdfr for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page(bp->b_data); 24936865Sdfr addr < bp->b_data + bp->b_bufsize; 25036865Sdfr addr += PAGE_SIZE, v += PAGE_SIZE) { 25136865Sdfr /* 25236865Sdfr * Do the vm_fault if needed; do the copy-on-write thing 25336865Sdfr * when reading stuff off device into memory. 25436865Sdfr */ 25536865Sdfr vm_fault_quick(addr, 25658345Sphk (bp->b_iocmd == BIO_READ)?(VM_PROT_READ|VM_PROT_WRITE):VM_PROT_READ); 25736865Sdfr pa = trunc_page(pmap_kextract((vm_offset_t) addr)); 25836865Sdfr if (pa == 0) 25936865Sdfr panic("vmapbuf: page not present"); 26036865Sdfr vm_page_hold(PHYS_TO_VM_PAGE(pa)); 26136865Sdfr pmap_kenter((vm_offset_t) v, pa); 26236865Sdfr } 26376932Sgallatin mtx_unlock(&vm_mtx); 26436865Sdfr 26536865Sdfr kva = bp->b_saveaddr; 26636865Sdfr bp->b_saveaddr = bp->b_data; 26736865Sdfr bp->b_data = kva + (((vm_offset_t) bp->b_data) & PAGE_MASK); 26836865Sdfr} 26936865Sdfr 27036865Sdfr/* 27136865Sdfr * Free the io map PTEs associated with this IO operation. 27236865Sdfr * We also invalidate the TLB entries and restore the original b_addr. 27336865Sdfr */ 27436865Sdfrvoid 27536865Sdfrvunmapbuf(bp) 27636865Sdfr register struct buf *bp; 27736865Sdfr{ 27836865Sdfr register caddr_t addr; 27936865Sdfr vm_offset_t pa; 28036865Sdfr 28136865Sdfr if ((bp->b_flags & B_PHYS) == 0) 28236865Sdfr panic("vunmapbuf"); 28336865Sdfr 28476932Sgallatin mtx_lock(&vm_mtx); 28536865Sdfr for (addr = (caddr_t)trunc_page(bp->b_data); 28636865Sdfr addr < bp->b_data + bp->b_bufsize; 28736865Sdfr addr += PAGE_SIZE) { 28836865Sdfr pa = trunc_page(pmap_kextract((vm_offset_t) addr)); 28936865Sdfr pmap_kremove((vm_offset_t) addr); 29036865Sdfr vm_page_unhold(PHYS_TO_VM_PAGE(pa)); 29136865Sdfr } 29276932Sgallatin mtx_unlock(&vm_mtx); 29336865Sdfr 29436865Sdfr bp->b_data = bp->b_saveaddr; 29536865Sdfr} 29636865Sdfr 29736865Sdfr/* 29865557Sjasone * Reset back to firmware. 29936865Sdfr */ 30036865Sdfrvoid 30136865Sdfrcpu_reset() 30236865Sdfr{ 30378342Sbenno OF_exit(); 30436865Sdfr} 30536865Sdfr 30636865Sdfrint 30743209Sjuliangrow_stack(p, sp) 30843209Sjulian struct proc *p; 30943209Sjulian size_t sp; 31043209Sjulian{ 31143209Sjulian int rv; 31236865Sdfr 31343209Sjulian rv = vm_map_growstack (p, sp); 31443209Sjulian if (rv != KERN_SUCCESS) 31543209Sjulian return (0); 31643209Sjulian 31743209Sjulian return (1); 31843209Sjulian} 31943209Sjulian 32043209Sjulian 32136865Sdfrstatic int cnt_prezero; 32236865Sdfr 32336865SdfrSYSCTL_INT(_machdep, OID_AUTO, cnt_prezero, CTLFLAG_RD, &cnt_prezero, 0, ""); 32436865Sdfr 32536865Sdfr/* 32636865Sdfr * Implement the pre-zeroed page mechanism. 32736865Sdfr * This routine is called from the idle loop. 32836865Sdfr */ 32943758Sdillon 33043758Sdillon#define ZIDLE_LO(v) ((v) * 2 / 3) 33143758Sdillon#define ZIDLE_HI(v) ((v) * 4 / 5) 33243758Sdillon 33336865Sdfrint 33436865Sdfrvm_page_zero_idle() 33536865Sdfr{ 33636865Sdfr static int free_rover; 33743753Sdillon static int zero_state; 33836865Sdfr vm_page_t m; 33936865Sdfr int s; 34036865Sdfr 34136865Sdfr /* 34243753Sdillon * Attempt to maintain approximately 1/2 of our free pages in a 34343753Sdillon * PG_ZERO'd state. Add some hysteresis to (attempt to) avoid 34443753Sdillon * generally zeroing a page when the system is near steady-state. 34543753Sdillon * Otherwise we might get 'flutter' during disk I/O / IPC or 34643753Sdillon * fast sleeps. We also do not want to be continuously zeroing 34743753Sdillon * pages because doing so may flush our L1 and L2 caches too much. 34836865Sdfr */ 34943753Sdillon 35076932Sgallatin if (mtx_trylock(&vm_mtx) == 0) 35176932Sgallatin return (0); 35276932Sgallatin if (zero_state && vm_page_zero_count >= ZIDLE_LO(cnt.v_free_count)) { 35376932Sgallatin mtx_unlock(&vm_mtx); 35443753Sdillon return(0); 35576932Sgallatin } 35676932Sgallatin if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count)) { 35776932Sgallatin mtx_unlock(&vm_mtx); 35843753Sdillon return(0); 35976932Sgallatin } 36043753Sdillon 36136865Sdfr s = splvm(); 36243752Sdillon m = vm_page_list_find(PQ_FREE, free_rover, FALSE); 36343753Sdillon zero_state = 0; 36443752Sdillon if (m != NULL && (m->flags & PG_ZERO) == 0) { 36549444Sjdp vm_page_queues[m->queue].lcnt--; 36652647Salc TAILQ_REMOVE(&vm_page_queues[m->queue].pl, m, pageq); 36736865Sdfr m->queue = PQ_NONE; 36836865Sdfr splx(s); 36978342Sbenno#if 0 37078342Sbenno rel_mplock(); 37178342Sbenno#endif 37236865Sdfr pmap_zero_page(VM_PAGE_TO_PHYS(m)); 37378342Sbenno#if 0 37478342Sbenno get_mplock(); 37578342Sbenno#endif 37636865Sdfr (void)splvm(); 37743752Sdillon vm_page_flag_set(m, PG_ZERO); 37843752Sdillon m->queue = PQ_FREE + m->pc; 37949444Sjdp vm_page_queues[m->queue].lcnt++; 38052647Salc TAILQ_INSERT_TAIL(&vm_page_queues[m->queue].pl, m, 38136865Sdfr pageq); 38236865Sdfr ++vm_page_zero_count; 38336865Sdfr ++cnt_prezero; 38443758Sdillon if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count)) 38543753Sdillon zero_state = 1; 38636865Sdfr } 38748974Salc free_rover = (free_rover + PQ_PRIME2) & PQ_L2_MASK; 38836865Sdfr splx(s); 38976932Sgallatin mtx_unlock(&vm_mtx); 39036865Sdfr return (1); 39136865Sdfr} 39236865Sdfr 39336865Sdfr/* 39436865Sdfr * Software interrupt handler for queued VM system processing. 39536865Sdfr */ 39636865Sdfrvoid 39767551Sjhbswi_vm(void *dummy) 39836865Sdfr{ 39978342Sbenno#if 0 /* XXX: Don't have busdma stuff yet */ 40036865Sdfr if (busdma_swi_pending != 0) 40136865Sdfr busdma_swi(); 40278342Sbenno#endif 40336865Sdfr} 40436865Sdfr 40536865Sdfr/* 40636865Sdfr * Tell whether this address is in some physical memory region. 40736865Sdfr * Currently used by the kernel coredump code in order to avoid 40836865Sdfr * dumping the ``ISA memory hole'' which could cause indefinite hangs, 40936865Sdfr * or other unpredictable behaviour. 41036865Sdfr */ 41136865Sdfr 41236865Sdfr 41336865Sdfrint 41436865Sdfris_physical_memory(addr) 41536865Sdfr vm_offset_t addr; 41636865Sdfr{ 41736865Sdfr /* 41836865Sdfr * stuff other tests for known memory-mapped devices (PCI?) 41936865Sdfr * here 42036865Sdfr */ 42136865Sdfr 42236865Sdfr return 1; 42336865Sdfr} 424