vm_machdep.c revision 25557
1/*-
2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * Copyright (c) 1994 John Dyson
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department, and William Jolitz.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
40 *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
41 *	$Id: vm_machdep.c,v 1.79 1997/04/16 12:11:37 kato Exp $
42 */
43
44#include "npx.h"
45#include "opt_bounce.h"
46
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/proc.h>
50#include <sys/malloc.h>
51#include <sys/buf.h>
52#include <sys/vnode.h>
53#include <sys/vmmeter.h>
54
55#include <machine/clock.h>
56#include <machine/cpu.h>
57#include <machine/reg.h>
58#include <machine/md_var.h>
59#include <machine/npx.h>
60#ifdef SMP
61#include <machine/smp.h>
62#endif
63
64#include <vm/vm.h>
65#include <vm/vm_param.h>
66#include <vm/vm_prot.h>
67#include <sys/lock.h>
68#include <vm/vm_kern.h>
69#include <vm/vm_page.h>
70#include <vm/vm_map.h>
71#include <vm/vm_extern.h>
72
73#include <sys/user.h>
74
75#ifdef PC98
76#include <pc98/pc98/pc98.h>
77#else
78#include <i386/isa/isa.h>
79#endif
80
81#ifdef SMP
82extern struct proc *SMPnpxproc[];
83#define npxproc (SMPnpxproc[cpunumber()])
84#else
85extern struct proc *npxproc;
86#endif
87
88#ifdef BOUNCE_BUFFERS
89static vm_offset_t
90		vm_bounce_kva __P((int size, int waitok));
91static void	vm_bounce_kva_free __P((vm_offset_t addr, vm_offset_t size,
92					int now));
93static vm_offset_t
94		vm_bounce_page_find __P((int count));
95static void	vm_bounce_page_free __P((vm_offset_t pa, int count));
96
97static volatile int	kvasfreecnt;
98
99caddr_t		bouncememory;
100int		bouncepages;
101static int	bpwait;
102static vm_offset_t	*bouncepa;
103static int		bmwait, bmfreeing;
104
105#define BITS_IN_UNSIGNED (8*sizeof(unsigned))
106static int		bounceallocarraysize;
107static unsigned	*bounceallocarray;
108static int		bouncefree;
109
110#if defined(PC98) && defined (EPSON_BOUNCEDMA)
111#define SIXTEENMEG (3840*4096)			/* 15MB boundary */
112#else
113#define SIXTEENMEG (4096*4096)
114#endif
115#define MAXBKVA 1024
116int		maxbkva = MAXBKVA*PAGE_SIZE;
117
118/* special list that can be used at interrupt time for eventual kva free */
119static struct kvasfree {
120	vm_offset_t addr;
121	vm_offset_t size;
122} kvaf[MAXBKVA];
123
124/*
125 * get bounce buffer pages (count physically contiguous)
126 * (only 1 inplemented now)
127 */
128static vm_offset_t
129vm_bounce_page_find(count)
130	int count;
131{
132	int bit;
133	int s,i;
134
135	if (count != 1)
136		panic("vm_bounce_page_find -- no support for > 1 page yet!!!");
137
138	s = splbio();
139retry:
140	for (i = 0; i < bounceallocarraysize; i++) {
141		if (bounceallocarray[i] != 0xffffffff) {
142			bit = ffs(~bounceallocarray[i]);
143			if (bit) {
144				bounceallocarray[i] |= 1 << (bit - 1) ;
145				bouncefree -= count;
146				splx(s);
147				return bouncepa[(i * BITS_IN_UNSIGNED + (bit - 1))];
148			}
149		}
150	}
151	bpwait = 1;
152	tsleep((caddr_t) &bounceallocarray, PRIBIO, "bncwai", 0);
153	goto retry;
154}
155
156static void
157vm_bounce_kva_free(addr, size, now)
158	vm_offset_t addr;
159	vm_offset_t size;
160	int now;
161{
162	int s = splbio();
163	kvaf[kvasfreecnt].addr = addr;
164	kvaf[kvasfreecnt].size = size;
165	++kvasfreecnt;
166	if( now) {
167		/*
168		 * this will do wakeups
169		 */
170		vm_bounce_kva(0,0);
171	} else {
172		if (bmwait) {
173		/*
174		 * if anyone is waiting on the bounce-map, then wakeup
175		 */
176			wakeup((caddr_t) io_map);
177			bmwait = 0;
178		}
179	}
180	splx(s);
181}
182
183/*
184 * free count bounce buffer pages
185 */
186static void
187vm_bounce_page_free(pa, count)
188	vm_offset_t pa;
189	int count;
190{
191	int allocindex;
192	int index;
193	int bit;
194
195	if (count != 1)
196		panic("vm_bounce_page_free -- no support for > 1 page yet!!!");
197
198	for(index=0;index<bouncepages;index++) {
199		if( pa == bouncepa[index])
200			break;
201	}
202
203	if( index == bouncepages)
204		panic("vm_bounce_page_free: invalid bounce buffer");
205
206	allocindex = index / BITS_IN_UNSIGNED;
207	bit = index % BITS_IN_UNSIGNED;
208
209	bounceallocarray[allocindex] &= ~(1 << bit);
210
211	bouncefree += count;
212	if (bpwait) {
213		bpwait = 0;
214		wakeup((caddr_t) &bounceallocarray);
215	}
216}
217
218/*
219 * allocate count bounce buffer kva pages
220 */
221static vm_offset_t
222vm_bounce_kva(size, waitok)
223	int size;
224	int waitok;
225{
226	int i;
227	vm_offset_t kva = 0;
228	vm_offset_t off;
229	int s = splbio();
230more:
231	if (!bmfreeing && kvasfreecnt) {
232		bmfreeing = 1;
233		for (i = 0; i < kvasfreecnt; i++) {
234			for(off=0;off<kvaf[i].size;off+=PAGE_SIZE) {
235				pmap_kremove( kvaf[i].addr + off);
236			}
237			kmem_free_wakeup(io_map, kvaf[i].addr,
238				kvaf[i].size);
239		}
240		kvasfreecnt = 0;
241		bmfreeing = 0;
242		if( bmwait) {
243			bmwait = 0;
244			wakeup( (caddr_t) io_map);
245		}
246	}
247
248	if( size == 0) {
249		splx(s);
250		return 0;
251	}
252
253	if ((kva = kmem_alloc_pageable(io_map, size)) == 0) {
254		if( !waitok) {
255			splx(s);
256			return 0;
257		}
258		bmwait = 1;
259		tsleep((caddr_t) io_map, PRIBIO, "bmwait", 0);
260		goto more;
261	}
262	splx(s);
263	return kva;
264}
265
266/*
267 * same as vm_bounce_kva -- but really allocate (but takes pages as arg)
268 */
269vm_offset_t
270vm_bounce_kva_alloc(count)
271int count;
272{
273	int i;
274	vm_offset_t kva;
275	vm_offset_t pa;
276	if( bouncepages == 0) {
277		kva = (vm_offset_t) malloc(count*PAGE_SIZE, M_TEMP, M_WAITOK);
278		return kva;
279	}
280	kva = vm_bounce_kva(count*PAGE_SIZE, 1);
281	for(i=0;i<count;i++) {
282		pa = vm_bounce_page_find(1);
283		pmap_kenter(kva + i * PAGE_SIZE, pa);
284	}
285	return kva;
286}
287
288/*
289 * same as vm_bounce_kva_free -- but really free
290 */
291void
292vm_bounce_kva_alloc_free(kva, count)
293	vm_offset_t kva;
294	int count;
295{
296	int i;
297	vm_offset_t pa;
298	if( bouncepages == 0) {
299		free((caddr_t) kva, M_TEMP);
300		return;
301	}
302	for(i = 0; i < count; i++) {
303		pa = pmap_kextract(kva + i * PAGE_SIZE);
304		vm_bounce_page_free(pa, 1);
305	}
306	vm_bounce_kva_free(kva, count*PAGE_SIZE, 0);
307}
308
309/*
310 * do the things necessary to the struct buf to implement
311 * bounce buffers...  inserted before the disk sort
312 */
313void
314vm_bounce_alloc(bp)
315	struct buf *bp;
316{
317	int countvmpg;
318	vm_offset_t vastart, vaend;
319	vm_offset_t vapstart, vapend;
320	vm_offset_t va, kva;
321	vm_offset_t pa;
322	int dobounceflag = 0;
323	int i;
324
325	if (bouncepages == 0)
326		return;
327
328	if (bp->b_flags & B_BOUNCE) {
329		printf("vm_bounce_alloc: called recursively???\n");
330		return;
331	}
332
333	if (bp->b_bufsize < bp->b_bcount) {
334		printf(
335		    "vm_bounce_alloc: b_bufsize(0x%lx) < b_bcount(0x%lx) !!\n",
336			bp->b_bufsize, bp->b_bcount);
337		panic("vm_bounce_alloc");
338	}
339
340/*
341 *  This is not really necessary
342 *	if( bp->b_bufsize != bp->b_bcount) {
343 *		printf("size: %d, count: %d\n", bp->b_bufsize, bp->b_bcount);
344 *	}
345 */
346
347
348	vastart = (vm_offset_t) bp->b_data;
349	vaend = (vm_offset_t) bp->b_data + bp->b_bufsize;
350
351	vapstart = trunc_page(vastart);
352	vapend = round_page(vaend);
353	countvmpg = (vapend - vapstart) / PAGE_SIZE;
354
355/*
356 * if any page is above 16MB, then go into bounce-buffer mode
357 */
358	va = vapstart;
359	for (i = 0; i < countvmpg; i++) {
360		pa = pmap_kextract(va);
361		if (pa >= SIXTEENMEG)
362			++dobounceflag;
363		if( pa == 0)
364			panic("vm_bounce_alloc: Unmapped page");
365		va += PAGE_SIZE;
366	}
367	if (dobounceflag == 0)
368		return;
369
370	if (bouncepages < dobounceflag)
371		panic("Not enough bounce buffers!!!");
372
373/*
374 * allocate a replacement kva for b_addr
375 */
376	kva = vm_bounce_kva(countvmpg*PAGE_SIZE, 1);
377#if 0
378	printf("%s: vapstart: %x, vapend: %x, countvmpg: %d, kva: %x ",
379		(bp->b_flags & B_READ) ? "read":"write",
380			vapstart, vapend, countvmpg, kva);
381#endif
382	va = vapstart;
383	for (i = 0; i < countvmpg; i++) {
384		pa = pmap_kextract(va);
385		if (pa >= SIXTEENMEG) {
386			/*
387			 * allocate a replacement page
388			 */
389			vm_offset_t bpa = vm_bounce_page_find(1);
390			pmap_kenter(kva + (PAGE_SIZE * i), bpa);
391#if 0
392			printf("r(%d): (%x,%x,%x) ", i, va, pa, bpa);
393#endif
394			/*
395			 * if we are writing, the copy the data into the page
396			 */
397			if ((bp->b_flags & B_READ) == 0) {
398				bcopy((caddr_t) va, (caddr_t) kva + (PAGE_SIZE * i), PAGE_SIZE);
399			}
400		} else {
401			/*
402			 * use original page
403			 */
404			pmap_kenter(kva + (PAGE_SIZE * i), pa);
405		}
406		va += PAGE_SIZE;
407	}
408
409/*
410 * flag the buffer as being bounced
411 */
412	bp->b_flags |= B_BOUNCE;
413/*
414 * save the original buffer kva
415 */
416	bp->b_savekva = bp->b_data;
417/*
418 * put our new kva into the buffer (offset by original offset)
419 */
420	bp->b_data = (caddr_t) (((vm_offset_t) kva) |
421				((vm_offset_t) bp->b_savekva & PAGE_MASK));
422#if 0
423	printf("b_savekva: %x, newva: %x\n", bp->b_savekva, bp->b_data);
424#endif
425	return;
426}
427
428/*
429 * hook into biodone to free bounce buffer
430 */
431void
432vm_bounce_free(bp)
433	struct buf *bp;
434{
435	int i;
436	vm_offset_t origkva, bouncekva, bouncekvaend;
437
438/*
439 * if this isn't a bounced buffer, then just return
440 */
441	if ((bp->b_flags & B_BOUNCE) == 0)
442		return;
443
444/*
445 *  This check is not necessary
446 *	if (bp->b_bufsize != bp->b_bcount) {
447 *		printf("vm_bounce_free: b_bufsize=%d, b_bcount=%d\n",
448 *			bp->b_bufsize, bp->b_bcount);
449 *	}
450 */
451
452	origkva = (vm_offset_t) bp->b_savekva;
453	bouncekva = (vm_offset_t) bp->b_data;
454/*
455	printf("free: %d ", bp->b_bufsize);
456*/
457
458/*
459 * check every page in the kva space for b_addr
460 */
461	for (i = 0; i < bp->b_bufsize; ) {
462		vm_offset_t mybouncepa;
463		vm_offset_t copycount;
464
465		copycount = round_page(bouncekva + 1) - bouncekva;
466		mybouncepa = pmap_kextract(trunc_page(bouncekva));
467
468/*
469 * if this is a bounced pa, then process as one
470 */
471		if ( mybouncepa != pmap_kextract( trunc_page( origkva))) {
472			vm_offset_t tocopy = copycount;
473			if (i + tocopy > bp->b_bufsize)
474				tocopy = bp->b_bufsize - i;
475/*
476 * if this is a read, then copy from bounce buffer into original buffer
477 */
478			if (bp->b_flags & B_READ)
479				bcopy((caddr_t) bouncekva, (caddr_t) origkva, tocopy);
480/*
481 * free the bounce allocation
482 */
483
484/*
485			printf("(kva: %x, pa: %x)", bouncekva, mybouncepa);
486*/
487			vm_bounce_page_free(mybouncepa, 1);
488		}
489
490		origkva += copycount;
491		bouncekva += copycount;
492		i += copycount;
493	}
494
495/*
496	printf("\n");
497*/
498/*
499 * add the old kva into the "to free" list
500 */
501
502	bouncekva= trunc_page((vm_offset_t) bp->b_data);
503	bouncekvaend= round_page((vm_offset_t)bp->b_data + bp->b_bufsize);
504
505/*
506	printf("freeva: %d\n", (bouncekvaend - bouncekva) / PAGE_SIZE);
507*/
508	vm_bounce_kva_free( bouncekva, (bouncekvaend - bouncekva), 0);
509	bp->b_data = bp->b_savekva;
510	bp->b_savekva = 0;
511	bp->b_flags &= ~B_BOUNCE;
512
513	return;
514}
515
516
517/*
518 * init the bounce buffer system
519 */
520void
521vm_bounce_init()
522{
523	int i;
524
525	kvasfreecnt = 0;
526
527	if (bouncepages == 0)
528		return;
529
530	bounceallocarraysize = (bouncepages + BITS_IN_UNSIGNED - 1) / BITS_IN_UNSIGNED;
531	bounceallocarray = malloc(bounceallocarraysize * sizeof(unsigned), M_TEMP, M_NOWAIT);
532
533	if (!bounceallocarray)
534		panic("Cannot allocate bounce resource array");
535
536	bouncepa = malloc(bouncepages * sizeof(vm_offset_t), M_TEMP, M_NOWAIT);
537	if (!bouncepa)
538		panic("Cannot allocate physical memory array");
539
540	for(i=0;i<bounceallocarraysize;i++) {
541		bounceallocarray[i] = 0xffffffff;
542	}
543
544	for(i=0;i<bouncepages;i++) {
545		vm_offset_t pa;
546		if( (pa = pmap_kextract((vm_offset_t) bouncememory + i * PAGE_SIZE)) >= SIXTEENMEG)
547			panic("bounce memory out of range");
548		if( pa == 0)
549			panic("bounce memory not resident");
550		bouncepa[i] = pa;
551		bounceallocarray[i/(8*sizeof(int))] &= ~(1<<(i%(8*sizeof(int))));
552	}
553	bouncefree = bouncepages;
554
555}
556#endif /* BOUNCE_BUFFERS */
557
558/*
559 * quick version of vm_fault
560 */
561void
562vm_fault_quick(v, prot)
563	caddr_t v;
564	int prot;
565{
566	if (prot & VM_PROT_WRITE)
567		subyte(v, fubyte(v));
568	else
569		fubyte(v);
570}
571
572/*
573 * Finish a fork operation, with process p2 nearly set up.
574 * Copy and update the pcb, set up the stack so that the child
575 * ready to run and return to user mode.
576 */
577void
578cpu_fork(p1, p2)
579	register struct proc *p1, *p2;
580{
581	struct pcb *pcb2 = &p2->p_addr->u_pcb;
582
583	/* Ensure that p1's pcb is up to date. */
584	if (npxproc == p1)
585		npxsave(&p1->p_addr->u_pcb.pcb_savefpu);
586
587	/* Copy p1's pcb. */
588	p2->p_addr->u_pcb = p1->p_addr->u_pcb;
589
590	/*
591	 * Create a new fresh stack for the new process.
592	 * Copy the trap frame for the return to user mode as if from a
593	 * syscall.  This copies the user mode register values.
594	 */
595	p2->p_md.md_regs = (struct trapframe *)
596			   ((int)p2->p_addr + UPAGES * PAGE_SIZE) - 1;
597	*p2->p_md.md_regs = *p1->p_md.md_regs;
598
599	/*
600	 * Set registers for trampoline to user mode.  Leave space for the
601	 * return address on stack.  These are the kernel mode register values.
602	 */
603	pcb2->pcb_cr3 = vtophys(p2->p_vmspace->vm_pmap.pm_pdir);
604	pcb2->pcb_edi = p2->p_md.md_regs->tf_edi;
605	pcb2->pcb_esi = (int)fork_return;
606	pcb2->pcb_ebp = p2->p_md.md_regs->tf_ebp;
607	pcb2->pcb_esp = (int)p2->p_md.md_regs - sizeof(void *);
608	pcb2->pcb_ebx = (int)p2;
609	pcb2->pcb_eip = (int)fork_trampoline;
610	/*
611	 * pcb2->pcb_ldt:	duplicated below, if necessary.
612	 * pcb2->pcb_ldt_len:	cloned above.
613	 * pcb2->pcb_savefpu:	cloned above.
614	 * pcb2->pcb_flags:	cloned above (always 0 here?).
615	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
616	 */
617
618#ifdef USER_LDT
619        /* Copy the LDT, if necessary. */
620        if (pcb2->pcb_ldt != 0) {
621                union descriptor *new_ldt;
622                size_t len = pcb2->pcb_ldt_len * sizeof(union descriptor);
623
624                new_ldt = (union descriptor *)kmem_alloc(kernel_map, len);
625                bcopy(pcb2->pcb_ldt, new_ldt, len);
626                pcb2->pcb_ldt = (caddr_t)new_ldt;
627        }
628#endif
629
630	/*
631	 * Now, cpu_switch() can schedule the new process.
632	 * pcb_esp is loaded pointing to the cpu_switch() stack frame
633	 * containing the return address when exiting cpu_switch.
634	 * This will normally be to proc_trampoline(), which will have
635	 * %ebx loaded with the new proc's pointer.  proc_trampoline()
636	 * will set up a stack to call fork_return(p, frame); to complete
637	 * the return to user-mode.
638	 */
639}
640
641/*
642 * Intercept the return address from a freshly forked process that has NOT
643 * been scheduled yet.
644 *
645 * This is needed to make kernel threads stay in kernel mode.
646 */
647void
648cpu_set_fork_handler(p, func, arg)
649	struct proc *p;
650	void (*func) __P((void *));
651	void *arg;
652{
653	/*
654	 * Note that the trap frame follows the args, so the function
655	 * is really called like this:  func(arg, frame);
656	 */
657	p->p_addr->u_pcb.pcb_esi = (int) func;	/* function */
658	p->p_addr->u_pcb.pcb_ebx = (int) arg;	/* first arg */
659}
660
661void
662cpu_exit(p)
663	register struct proc *p;
664{
665#ifdef USER_LDT
666	struct pcb *pcb;
667#endif
668
669#if NNPX > 0
670	npxexit(p);
671#endif	/* NNPX */
672#ifdef USER_LDT
673	pcb = &p->p_addr->u_pcb;
674	if (pcb->pcb_ldt != 0) {
675		if (pcb == curpcb)
676			lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
677		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
678			pcb->pcb_ldt_len * sizeof(union descriptor));
679		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
680	}
681#endif
682	cnt.v_swtch++;
683	cpu_switch(p);
684	panic("cpu_exit");
685}
686
687void
688cpu_wait(p)
689	struct proc *p;
690{
691	/* drop per-process resources */
692	pmap_dispose_proc(p);
693	vmspace_free(p->p_vmspace);
694}
695
696/*
697 * Dump the machine specific header information at the start of a core dump.
698 */
699int
700cpu_coredump(p, vp, cred)
701	struct proc *p;
702	struct vnode *vp;
703	struct ucred *cred;
704{
705
706	return (vn_rdwr(UIO_WRITE, vp, (caddr_t) p->p_addr, ctob(UPAGES),
707	    (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *)NULL,
708	    p));
709}
710
711#ifdef notyet
712static void
713setredzone(pte, vaddr)
714	u_short *pte;
715	caddr_t vaddr;
716{
717/* eventually do this by setting up an expand-down stack segment
718   for ss0: selector, allowing stack access down to top of u.
719   this means though that protection violations need to be handled
720   thru a double fault exception that must do an integral task
721   switch to a known good context, within which a dump can be
722   taken. a sensible scheme might be to save the initial context
723   used by sched (that has physical memory mapped 1:1 at bottom)
724   and take the dump while still in mapped mode */
725}
726#endif
727
728/*
729 * Convert kernel VA to physical address
730 */
731u_long
732kvtop(void *addr)
733{
734	vm_offset_t va;
735
736	va = pmap_kextract((vm_offset_t)addr);
737	if (va == 0)
738		panic("kvtop: zero page frame");
739	return((int)va);
740}
741
742/*
743 * Map an IO request into kernel virtual address space.
744 *
745 * All requests are (re)mapped into kernel VA space.
746 * Notice that we use b_bufsize for the size of the buffer
747 * to be mapped.  b_bcount might be modified by the driver.
748 */
749void
750vmapbuf(bp)
751	register struct buf *bp;
752{
753	register caddr_t addr, v, kva;
754	vm_offset_t pa;
755
756	if ((bp->b_flags & B_PHYS) == 0)
757		panic("vmapbuf");
758
759	for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page(bp->b_data);
760	    addr < bp->b_data + bp->b_bufsize;
761	    addr += PAGE_SIZE, v += PAGE_SIZE) {
762		/*
763		 * Do the vm_fault if needed; do the copy-on-write thing
764		 * when reading stuff off device into memory.
765		 */
766		vm_fault_quick(addr,
767			(bp->b_flags&B_READ)?(VM_PROT_READ|VM_PROT_WRITE):VM_PROT_READ);
768		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
769		if (pa == 0)
770			panic("vmapbuf: page not present");
771		vm_page_hold(PHYS_TO_VM_PAGE(pa));
772		pmap_kenter((vm_offset_t) v, pa);
773	}
774
775	kva = bp->b_saveaddr;
776	bp->b_saveaddr = bp->b_data;
777	bp->b_data = kva + (((vm_offset_t) bp->b_data) & PAGE_MASK);
778}
779
780/*
781 * Free the io map PTEs associated with this IO operation.
782 * We also invalidate the TLB entries and restore the original b_addr.
783 */
784void
785vunmapbuf(bp)
786	register struct buf *bp;
787{
788	register caddr_t addr;
789	vm_offset_t pa;
790
791	if ((bp->b_flags & B_PHYS) == 0)
792		panic("vunmapbuf");
793
794	for (addr = (caddr_t)trunc_page(bp->b_data);
795	    addr < bp->b_data + bp->b_bufsize;
796	    addr += PAGE_SIZE) {
797		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
798		pmap_kremove((vm_offset_t) addr);
799		vm_page_unhold(PHYS_TO_VM_PAGE(pa));
800	}
801
802	bp->b_data = bp->b_saveaddr;
803}
804
805/*
806 * Force reset the processor by invalidating the entire address space!
807 */
808void
809cpu_reset() {
810#ifdef PC98
811	/*
812	 * Attempt to do a CPU reset via CPU reset port.
813	 */
814	asm("cli");
815	outb(0x37, 0x0f);		/* SHUT0 = 0. */
816	outb(0x37, 0x0b);		/* SHUT1 = 0. */
817	outb(0xf0, 0x00);		/* Reset. */
818#else
819	/*
820	 * Attempt to do a CPU reset via the keyboard controller,
821	 * do not turn of the GateA20, as any machine that fails
822	 * to do the reset here would then end up in no man's land.
823	 */
824
825#if !defined(BROKEN_KEYBOARD_RESET)
826	outb(IO_KBD + 4, 0xFE);
827	DELAY(500000);	/* wait 0.5 sec to see if that did it */
828	printf("Keyboard reset did not work, attempting CPU shutdown\n");
829	DELAY(1000000);	/* wait 1 sec for printf to complete */
830#endif
831#endif /* PC98 */
832	/* force a shutdown by unmapping entire address space ! */
833	bzero((caddr_t) PTD, PAGE_SIZE);
834
835	/* "good night, sweet prince .... <THUNK!>" */
836	invltlb();
837	/* NOTREACHED */
838	while(1);
839}
840
841/*
842 * Grow the user stack to allow for 'sp'. This version grows the stack in
843 *	chunks of SGROWSIZ.
844 */
845int
846grow(p, sp)
847	struct proc *p;
848	u_int sp;
849{
850	unsigned int nss;
851	caddr_t v;
852	struct vmspace *vm = p->p_vmspace;
853
854	if ((caddr_t)sp <= vm->vm_maxsaddr || (unsigned)sp >= (unsigned)USRSTACK)
855	    return (1);
856
857	nss = roundup(USRSTACK - (unsigned)sp, PAGE_SIZE);
858
859	if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur)
860		return (0);
861
862	if (vm->vm_ssize && roundup(vm->vm_ssize << PAGE_SHIFT,
863	    SGROWSIZ) < nss) {
864		int grow_amount;
865		/*
866		 * If necessary, grow the VM that the stack occupies
867		 * to allow for the rlimit. This allows us to not have
868		 * to allocate all of the VM up-front in execve (which
869		 * is expensive).
870		 * Grow the VM by the amount requested rounded up to
871		 * the nearest SGROWSIZ to provide for some hysteresis.
872		 */
873		grow_amount = roundup((nss - (vm->vm_ssize << PAGE_SHIFT)), SGROWSIZ);
874		v = (char *)USRSTACK - roundup(vm->vm_ssize << PAGE_SHIFT,
875		    SGROWSIZ) - grow_amount;
876		/*
877		 * If there isn't enough room to extend by SGROWSIZ, then
878		 * just extend to the maximum size
879		 */
880		if (v < vm->vm_maxsaddr) {
881			v = vm->vm_maxsaddr;
882			grow_amount = MAXSSIZ - (vm->vm_ssize << PAGE_SHIFT);
883		}
884		if ((grow_amount == 0) || (vm_map_find(&vm->vm_map, NULL, 0, (vm_offset_t *)&v,
885		    grow_amount, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != KERN_SUCCESS)) {
886			return (0);
887		}
888		vm->vm_ssize += grow_amount >> PAGE_SHIFT;
889	}
890
891	return (1);
892}
893
894/*
895 * Implement the pre-zeroed page mechanism.
896 * This routine is called from the idle loop.
897 */
898int
899vm_page_zero_idle()
900{
901	static int free_rover;
902	vm_page_t m;
903	int s;
904
905#ifdef WRONG
906	if (cnt.v_free_count <= cnt.v_interrupt_free_min)
907		return (0);
908#endif
909	/*
910	 * XXX
911	 * We stop zeroing pages when there are sufficent prezeroed pages.
912	 * This threshold isn't really needed, except we want to
913	 * bypass unneeded calls to vm_page_list_find, and the
914	 * associated cache flush and latency.  The pre-zero will
915	 * still be called when there are significantly more
916	 * non-prezeroed pages than zeroed pages.  The threshold
917	 * of half the number of reserved pages is arbitrary, but
918	 * approximately the right amount.  Eventually, we should
919	 * perhaps interrupt the zero operation when a process
920	 * is found to be ready to run.
921	 */
922	if (cnt.v_free_count - vm_page_zero_count <= cnt.v_free_reserved / 2)
923		return (0);
924	s = splvm();
925	enable_intr();
926	m = vm_page_list_find(PQ_FREE, free_rover);
927	if (m != NULL) {
928		--(*vm_page_queues[m->queue].lcnt);
929		TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq);
930		splx(s);
931		pmap_zero_page(VM_PAGE_TO_PHYS(m));
932		(void)splvm();
933		m->queue = PQ_ZERO + m->pc;
934		++(*vm_page_queues[m->queue].lcnt);
935		TAILQ_INSERT_HEAD(vm_page_queues[m->queue].pl, m, pageq);
936		free_rover = (free_rover + PQ_PRIME3) & PQ_L2_MASK;
937		++vm_page_zero_count;
938	}
939	splx(s);
940	disable_intr();
941	return (1);
942}
943