vm_machdep.c revision 78342
136865Sdfr/*-
236865Sdfr * Copyright (c) 1982, 1986 The Regents of the University of California.
336865Sdfr * Copyright (c) 1989, 1990 William Jolitz
436865Sdfr * Copyright (c) 1994 John Dyson
536865Sdfr * All rights reserved.
636865Sdfr *
736865Sdfr * This code is derived from software contributed to Berkeley by
836865Sdfr * the Systems Programming Group of the University of Utah Computer
936865Sdfr * Science Department, and William Jolitz.
1036865Sdfr *
1136865Sdfr * Redistribution and use in source and binary forms, with or without
1236865Sdfr * modification, are permitted provided that the following conditions
1336865Sdfr * are met:
1436865Sdfr * 1. Redistributions of source code must retain the above copyright
1536865Sdfr *    notice, this list of conditions and the following disclaimer.
1636865Sdfr * 2. Redistributions in binary form must reproduce the above copyright
1736865Sdfr *    notice, this list of conditions and the following disclaimer in the
1836865Sdfr *    documentation and/or other materials provided with the distribution.
1936865Sdfr * 3. All advertising materials mentioning features or use of this software
2036865Sdfr *    must display the following acknowledgement:
2136865Sdfr *	This product includes software developed by the University of
2236865Sdfr *	California, Berkeley and its contributors.
2336865Sdfr * 4. Neither the name of the University nor the names of its contributors
2436865Sdfr *    may be used to endorse or promote products derived from this software
2536865Sdfr *    without specific prior written permission.
2636865Sdfr *
2736865Sdfr * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2836865Sdfr * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2936865Sdfr * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
3036865Sdfr * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
3136865Sdfr * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3236865Sdfr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3336865Sdfr * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3436865Sdfr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3536865Sdfr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3636865Sdfr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3736865Sdfr * SUCH DAMAGE.
3836865Sdfr *
3936865Sdfr *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
4036865Sdfr *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
4150477Speter * $FreeBSD: head/sys/powerpc/aim/vm_machdep.c 78342 2001-06-16 07:14:07Z benno $
4236865Sdfr */
4336865Sdfr/*
4436865Sdfr * Copyright (c) 1994, 1995, 1996 Carnegie-Mellon University.
4536865Sdfr * All rights reserved.
4636865Sdfr *
4736865Sdfr * Author: Chris G. Demetriou
4836865Sdfr *
4936865Sdfr * Permission to use, copy, modify and distribute this software and
5036865Sdfr * its documentation is hereby granted, provided that both the copyright
5136865Sdfr * notice and this permission notice appear in all copies of the
5236865Sdfr * software, derivative works or modified versions, and any portions
5336865Sdfr * thereof, and that both notices appear in supporting documentation.
5436865Sdfr *
5536865Sdfr * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
5636865Sdfr * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
5736865Sdfr * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
5836865Sdfr *
5936865Sdfr * Carnegie Mellon requests users of this software to return to
6036865Sdfr *
6136865Sdfr *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
6236865Sdfr *  School of Computer Science
6336865Sdfr *  Carnegie Mellon University
6436865Sdfr *  Pittsburgh PA 15213-3890
6536865Sdfr *
6636865Sdfr * any improvements or extensions that they make and grant Carnegie the
6736865Sdfr * rights to redistribute these changes.
6836865Sdfr */
6936865Sdfr
7036865Sdfr#include <sys/param.h>
7136865Sdfr#include <sys/systm.h>
7236865Sdfr#include <sys/proc.h>
7336865Sdfr#include <sys/malloc.h>
7460041Sphk#include <sys/bio.h>
7536865Sdfr#include <sys/buf.h>
7667365Sjhb#include <sys/mutex.h>
7736865Sdfr#include <sys/vnode.h>
7836865Sdfr#include <sys/vmmeter.h>
7936865Sdfr#include <sys/kernel.h>
8036865Sdfr#include <sys/sysctl.h>
8154207Speter#include <sys/unistd.h>
8236865Sdfr
8336865Sdfr#include <machine/clock.h>
8436865Sdfr#include <machine/cpu.h>
8541499Sdfr#include <machine/fpu.h>
8636865Sdfr#include <machine/md_var.h>
8736865Sdfr
8878342Sbenno#include <dev/ofw/openfirm.h>
8978342Sbenno
9036865Sdfr#include <vm/vm.h>
9136865Sdfr#include <vm/vm_param.h>
9236865Sdfr#include <sys/lock.h>
9336865Sdfr#include <vm/vm_kern.h>
9436865Sdfr#include <vm/vm_page.h>
9536865Sdfr#include <vm/vm_map.h>
9636865Sdfr#include <vm/vm_extern.h>
9736865Sdfr
9836865Sdfr#include <sys/user.h>
9936865Sdfr
10036865Sdfr/*
10136865Sdfr * quick version of vm_fault
10236865Sdfr */
10351474Sdillonint
10436865Sdfrvm_fault_quick(v, prot)
10536865Sdfr	caddr_t v;
10636865Sdfr	int prot;
10736865Sdfr{
10851474Sdillon	int r;
10936865Sdfr	if (prot & VM_PROT_WRITE)
11051474Sdillon		r = subyte(v, fubyte(v));
11136865Sdfr	else
11251474Sdillon		r = fubyte(v);
11351474Sdillon	return(r);
11436865Sdfr}
11536865Sdfr
11636865Sdfr/*
11736865Sdfr * Finish a fork operation, with process p2 nearly set up.
11836865Sdfr * Copy and update the pcb, set up the stack so that the child
11936865Sdfr * ready to run and return to user mode.
12036865Sdfr */
12136865Sdfrvoid
12254188Sluoqicpu_fork(p1, p2, flags)
12336865Sdfr	register struct proc *p1, *p2;
12454188Sluoqi	int flags;
12536865Sdfr{
12678342Sbenno	/* XXX: coming soon... */
12736865Sdfr}
12836865Sdfr
12936865Sdfr/*
13036865Sdfr * Intercept the return address from a freshly forked process that has NOT
13136865Sdfr * been scheduled yet.
13236865Sdfr *
13336865Sdfr * This is needed to make kernel threads stay in kernel mode.
13436865Sdfr */
13536865Sdfrvoid
13636865Sdfrcpu_set_fork_handler(p, func, arg)
13736865Sdfr	struct proc *p;
13848391Speter	void (*func) __P((void *));
13948391Speter	void *arg;
14036865Sdfr{
14136865Sdfr	/*
14236865Sdfr	 * Note that the trap frame follows the args, so the function
14336865Sdfr	 * is really called like this:  func(arg, frame);
14436865Sdfr	 */
14578342Sbenno#if 0 /* XXX */
14636865Sdfr	p->p_addr->u_pcb.pcb_context[0] = (u_long) func;
14736865Sdfr	p->p_addr->u_pcb.pcb_context[2] = (u_long) arg;
14878342Sbenno#endif
14936865Sdfr}
15036865Sdfr
15136865Sdfr/*
15236865Sdfr * cpu_exit is called as the last action during exit.
15336865Sdfr * We release the address space of the process, block interrupts,
15436865Sdfr * and call switch_exit.  switch_exit switches to proc0's PCB and stack,
15536865Sdfr * then jumps into the middle of cpu_switch, as if it were switching
15636865Sdfr * from proc0.
15736865Sdfr */
15836865Sdfrvoid
15936865Sdfrcpu_exit(p)
16036865Sdfr	register struct proc *p;
16136865Sdfr{
16273922Sjhb	PROC_LOCK(p);
16372200Sbmilekic	mtx_lock_spin(&sched_lock);
16472200Sbmilekic	mtx_unlock_flags(&Giant, MTX_NOSWITCH);
16568762Sjhb	mtx_assert(&Giant, MA_NOTOWNED);
16670317Sjake
16770317Sjake	/*
16870317Sjake	 * We have to wait until after releasing all locks before
16970317Sjake	 * changing p_stat.  If we block on a mutex then we will be
17070317Sjake	 * back at SRUN when we resume and our parent will never
17170317Sjake	 * harvest us.
17270317Sjake	 */
17370317Sjake	p->p_stat = SZOMB;
17470317Sjake
17578342Sbenno	mp_fixme("assumption: p_pptr won't change at this time");
17670317Sjake	wakeup(p->p_pptr);
17773922Sjhb	PROC_UNLOCK_NOSWITCH(p);
17870317Sjake
17936865Sdfr	cnt.v_swtch++;
18065557Sjasone	cpu_switch();
18136865Sdfr	panic("cpu_exit");
18236865Sdfr}
18336865Sdfr
18436865Sdfrvoid
18536865Sdfrcpu_wait(p)
18636865Sdfr	struct proc *p;
18736865Sdfr{
18876932Sgallatin
18976932Sgallatin	mtx_lock(&vm_mtx);
19036865Sdfr	/* drop per-process resources */
19136865Sdfr	pmap_dispose_proc(p);
19236865Sdfr
19336865Sdfr	/* and clean-out the vmspace */
19436865Sdfr	vmspace_free(p->p_vmspace);
19576932Sgallatin	mtx_unlock(&vm_mtx);
19636865Sdfr}
19736865Sdfr
19836865Sdfr/*
19936865Sdfr * Dump the machine specific header information at the start of a core dump.
20036865Sdfr */
20136865Sdfrint
20236865Sdfrcpu_coredump(p, vp, cred)
20336865Sdfr	struct proc *p;
20436865Sdfr	struct vnode *vp;
20536865Sdfr	struct ucred *cred;
20636865Sdfr{
20736865Sdfr
20836865Sdfr	return (vn_rdwr(UIO_WRITE, vp, (caddr_t) p->p_addr, ctob(UPAGES),
20936865Sdfr	    (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *)NULL,
21036865Sdfr	    p));
21136865Sdfr}
21236865Sdfr
21336865Sdfr#ifdef notyet
21436865Sdfrstatic void
21536865Sdfrsetredzone(pte, vaddr)
21636865Sdfr	u_short *pte;
21736865Sdfr	caddr_t vaddr;
21836865Sdfr{
21936865Sdfr/* eventually do this by setting up an expand-down stack segment
22036865Sdfr   for ss0: selector, allowing stack access down to top of u.
22136865Sdfr   this means though that protection violations need to be handled
22236865Sdfr   thru a double fault exception that must do an integral task
22336865Sdfr   switch to a known good context, within which a dump can be
22436865Sdfr   taken. a sensible scheme might be to save the initial context
22536865Sdfr   used by sched (that has physical memory mapped 1:1 at bottom)
22636865Sdfr   and take the dump while still in mapped mode */
22736865Sdfr}
22836865Sdfr#endif
22936865Sdfr
23036865Sdfr/*
23136865Sdfr * Map an IO request into kernel virtual address space.
23236865Sdfr *
23336865Sdfr * All requests are (re)mapped into kernel VA space.
23436865Sdfr * Notice that we use b_bufsize for the size of the buffer
23536865Sdfr * to be mapped.  b_bcount might be modified by the driver.
23636865Sdfr */
23736865Sdfrvoid
23836865Sdfrvmapbuf(bp)
23936865Sdfr	register struct buf *bp;
24036865Sdfr{
24136865Sdfr	register caddr_t addr, v, kva;
24236865Sdfr	vm_offset_t pa;
24336865Sdfr
24436865Sdfr	if ((bp->b_flags & B_PHYS) == 0)
24536865Sdfr		panic("vmapbuf");
24636865Sdfr
24776932Sgallatin	mtx_lock(&vm_mtx);
24836865Sdfr	for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page(bp->b_data);
24936865Sdfr	    addr < bp->b_data + bp->b_bufsize;
25036865Sdfr	    addr += PAGE_SIZE, v += PAGE_SIZE) {
25136865Sdfr		/*
25236865Sdfr		 * Do the vm_fault if needed; do the copy-on-write thing
25336865Sdfr		 * when reading stuff off device into memory.
25436865Sdfr		 */
25536865Sdfr		vm_fault_quick(addr,
25658345Sphk			(bp->b_iocmd == BIO_READ)?(VM_PROT_READ|VM_PROT_WRITE):VM_PROT_READ);
25736865Sdfr		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
25836865Sdfr		if (pa == 0)
25936865Sdfr			panic("vmapbuf: page not present");
26036865Sdfr		vm_page_hold(PHYS_TO_VM_PAGE(pa));
26136865Sdfr		pmap_kenter((vm_offset_t) v, pa);
26236865Sdfr	}
26376932Sgallatin	mtx_unlock(&vm_mtx);
26436865Sdfr
26536865Sdfr	kva = bp->b_saveaddr;
26636865Sdfr	bp->b_saveaddr = bp->b_data;
26736865Sdfr	bp->b_data = kva + (((vm_offset_t) bp->b_data) & PAGE_MASK);
26836865Sdfr}
26936865Sdfr
27036865Sdfr/*
27136865Sdfr * Free the io map PTEs associated with this IO operation.
27236865Sdfr * We also invalidate the TLB entries and restore the original b_addr.
27336865Sdfr */
27436865Sdfrvoid
27536865Sdfrvunmapbuf(bp)
27636865Sdfr	register struct buf *bp;
27736865Sdfr{
27836865Sdfr	register caddr_t addr;
27936865Sdfr	vm_offset_t pa;
28036865Sdfr
28136865Sdfr	if ((bp->b_flags & B_PHYS) == 0)
28236865Sdfr		panic("vunmapbuf");
28336865Sdfr
28476932Sgallatin	mtx_lock(&vm_mtx);
28536865Sdfr	for (addr = (caddr_t)trunc_page(bp->b_data);
28636865Sdfr	    addr < bp->b_data + bp->b_bufsize;
28736865Sdfr	    addr += PAGE_SIZE) {
28836865Sdfr		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
28936865Sdfr		pmap_kremove((vm_offset_t) addr);
29036865Sdfr		vm_page_unhold(PHYS_TO_VM_PAGE(pa));
29136865Sdfr	}
29276932Sgallatin	mtx_unlock(&vm_mtx);
29336865Sdfr
29436865Sdfr	bp->b_data = bp->b_saveaddr;
29536865Sdfr}
29636865Sdfr
29736865Sdfr/*
29865557Sjasone * Reset back to firmware.
29936865Sdfr */
30036865Sdfrvoid
30136865Sdfrcpu_reset()
30236865Sdfr{
30378342Sbenno	OF_exit();
30436865Sdfr}
30536865Sdfr
30636865Sdfrint
30743209Sjuliangrow_stack(p, sp)
30843209Sjulian	struct proc *p;
30943209Sjulian	size_t sp;
31043209Sjulian{
31143209Sjulian	int rv;
31236865Sdfr
31343209Sjulian	rv = vm_map_growstack (p, sp);
31443209Sjulian	if (rv != KERN_SUCCESS)
31543209Sjulian		return (0);
31643209Sjulian
31743209Sjulian	return (1);
31843209Sjulian}
31943209Sjulian
32043209Sjulian
32136865Sdfrstatic int cnt_prezero;
32236865Sdfr
32336865SdfrSYSCTL_INT(_machdep, OID_AUTO, cnt_prezero, CTLFLAG_RD, &cnt_prezero, 0, "");
32436865Sdfr
32536865Sdfr/*
32636865Sdfr * Implement the pre-zeroed page mechanism.
32736865Sdfr * This routine is called from the idle loop.
32836865Sdfr */
32943758Sdillon
33043758Sdillon#define ZIDLE_LO(v)    ((v) * 2 / 3)
33143758Sdillon#define ZIDLE_HI(v)    ((v) * 4 / 5)
33243758Sdillon
33336865Sdfrint
33436865Sdfrvm_page_zero_idle()
33536865Sdfr{
33636865Sdfr	static int free_rover;
33743753Sdillon	static int zero_state;
33836865Sdfr	vm_page_t m;
33936865Sdfr	int s;
34036865Sdfr
34136865Sdfr	/*
34243753Sdillon         * Attempt to maintain approximately 1/2 of our free pages in a
34343753Sdillon         * PG_ZERO'd state.   Add some hysteresis to (attempt to) avoid
34443753Sdillon         * generally zeroing a page when the system is near steady-state.
34543753Sdillon         * Otherwise we might get 'flutter' during disk I/O / IPC or
34643753Sdillon         * fast sleeps.  We also do not want to be continuously zeroing
34743753Sdillon         * pages because doing so may flush our L1 and L2 caches too much.
34836865Sdfr	 */
34943753Sdillon
35076932Sgallatin	if (mtx_trylock(&vm_mtx) == 0)
35176932Sgallatin		return (0);
35276932Sgallatin	if (zero_state && vm_page_zero_count >= ZIDLE_LO(cnt.v_free_count)) {
35376932Sgallatin		mtx_unlock(&vm_mtx);
35443753Sdillon		return(0);
35576932Sgallatin	}
35676932Sgallatin	if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count)) {
35776932Sgallatin		mtx_unlock(&vm_mtx);
35843753Sdillon		return(0);
35976932Sgallatin	}
36043753Sdillon
36136865Sdfr		s = splvm();
36243752Sdillon		m = vm_page_list_find(PQ_FREE, free_rover, FALSE);
36343753Sdillon		zero_state = 0;
36443752Sdillon		if (m != NULL && (m->flags & PG_ZERO) == 0) {
36549444Sjdp			vm_page_queues[m->queue].lcnt--;
36652647Salc			TAILQ_REMOVE(&vm_page_queues[m->queue].pl, m, pageq);
36736865Sdfr			m->queue = PQ_NONE;
36836865Sdfr			splx(s);
36978342Sbenno#if 0
37078342Sbenno			rel_mplock();
37178342Sbenno#endif
37236865Sdfr			pmap_zero_page(VM_PAGE_TO_PHYS(m));
37378342Sbenno#if 0
37478342Sbenno			get_mplock();
37578342Sbenno#endif
37636865Sdfr			(void)splvm();
37743752Sdillon			vm_page_flag_set(m, PG_ZERO);
37843752Sdillon			m->queue = PQ_FREE + m->pc;
37949444Sjdp			vm_page_queues[m->queue].lcnt++;
38052647Salc			TAILQ_INSERT_TAIL(&vm_page_queues[m->queue].pl, m,
38136865Sdfr			    pageq);
38236865Sdfr			++vm_page_zero_count;
38336865Sdfr			++cnt_prezero;
38443758Sdillon			if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count))
38543753Sdillon				zero_state = 1;
38636865Sdfr		}
38748974Salc		free_rover = (free_rover + PQ_PRIME2) & PQ_L2_MASK;
38836865Sdfr		splx(s);
38976932Sgallatin		mtx_unlock(&vm_mtx);
39036865Sdfr		return (1);
39136865Sdfr}
39236865Sdfr
39336865Sdfr/*
39436865Sdfr * Software interrupt handler for queued VM system processing.
39536865Sdfr */
39636865Sdfrvoid
39767551Sjhbswi_vm(void *dummy)
39836865Sdfr{
39978342Sbenno#if 0 /* XXX: Don't have busdma stuff yet */
40036865Sdfr	if (busdma_swi_pending != 0)
40136865Sdfr		busdma_swi();
40278342Sbenno#endif
40336865Sdfr}
40436865Sdfr
40536865Sdfr/*
40636865Sdfr * Tell whether this address is in some physical memory region.
40736865Sdfr * Currently used by the kernel coredump code in order to avoid
40836865Sdfr * dumping the ``ISA memory hole'' which could cause indefinite hangs,
40936865Sdfr * or other unpredictable behaviour.
41036865Sdfr */
41136865Sdfr
41236865Sdfr
41336865Sdfrint
41436865Sdfris_physical_memory(addr)
41536865Sdfr	vm_offset_t addr;
41636865Sdfr{
41736865Sdfr	/*
41836865Sdfr	 * stuff other tests for known memory-mapped devices (PCI?)
41936865Sdfr	 * here
42036865Sdfr	 */
42136865Sdfr
42236865Sdfr	return 1;
42336865Sdfr}
424