vm_machdep.c revision 27993
1/*-
2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * Copyright (c) 1994 John Dyson
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department, and William Jolitz.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
40 *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
41 *	$Id: vm_machdep.c,v 1.84 1997/07/20 08:37:24 bde Exp $
42 */
43
44#include "npx.h"
45#include "opt_bounce.h"
46
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/proc.h>
50#include <sys/malloc.h>
51#include <sys/buf.h>
52#include <sys/vnode.h>
53#include <sys/vmmeter.h>
54
55#include <machine/clock.h>
56#include <machine/cpu.h>
57#include <machine/md_var.h>
58#include <machine/pcb_ext.h>
59
60#include <vm/vm.h>
61#include <vm/vm_param.h>
62#include <vm/vm_prot.h>
63#include <sys/lock.h>
64#include <vm/vm_kern.h>
65#include <vm/vm_page.h>
66#include <vm/vm_map.h>
67#include <vm/vm_extern.h>
68
69#include <sys/user.h>
70
71#ifdef PC98
72#include <pc98/pc98/pc98.h>
73#else
74#include <i386/isa/isa.h>
75#endif
76
77#ifdef BOUNCE_BUFFERS
78static vm_offset_t
79		vm_bounce_kva __P((int size, int waitok));
80static void	vm_bounce_kva_free __P((vm_offset_t addr, vm_offset_t size,
81					int now));
82static vm_offset_t
83		vm_bounce_page_find __P((int count));
84static void	vm_bounce_page_free __P((vm_offset_t pa, int count));
85
86static volatile int	kvasfreecnt;
87
88caddr_t		bouncememory;
89int		bouncepages;
90static int	bpwait;
91static vm_offset_t	*bouncepa;
92static int		bmwait, bmfreeing;
93
94#define BITS_IN_UNSIGNED (8*sizeof(unsigned))
95static int		bounceallocarraysize;
96static unsigned	*bounceallocarray;
97static int		bouncefree;
98
99#if defined(PC98) && defined (EPSON_BOUNCEDMA)
100#define SIXTEENMEG (3840*4096)			/* 15MB boundary */
101#else
102#define SIXTEENMEG (4096*4096)
103#endif
104#define MAXBKVA 1024
105int		maxbkva = MAXBKVA*PAGE_SIZE;
106
107/* special list that can be used at interrupt time for eventual kva free */
108static struct kvasfree {
109	vm_offset_t addr;
110	vm_offset_t size;
111} kvaf[MAXBKVA];
112
113/*
114 * get bounce buffer pages (count physically contiguous)
115 * (only 1 inplemented now)
116 */
117static vm_offset_t
118vm_bounce_page_find(count)
119	int count;
120{
121	int bit;
122	int s,i;
123
124	if (count != 1)
125		panic("vm_bounce_page_find -- no support for > 1 page yet!!!");
126
127	s = splbio();
128retry:
129	for (i = 0; i < bounceallocarraysize; i++) {
130		if (bounceallocarray[i] != 0xffffffff) {
131			bit = ffs(~bounceallocarray[i]);
132			if (bit) {
133				bounceallocarray[i] |= 1 << (bit - 1) ;
134				bouncefree -= count;
135				splx(s);
136				return bouncepa[(i * BITS_IN_UNSIGNED + (bit - 1))];
137			}
138		}
139	}
140	bpwait = 1;
141	tsleep((caddr_t) &bounceallocarray, PRIBIO, "bncwai", 0);
142	goto retry;
143}
144
145static void
146vm_bounce_kva_free(addr, size, now)
147	vm_offset_t addr;
148	vm_offset_t size;
149	int now;
150{
151	int s = splbio();
152	kvaf[kvasfreecnt].addr = addr;
153	kvaf[kvasfreecnt].size = size;
154	++kvasfreecnt;
155	if( now) {
156		/*
157		 * this will do wakeups
158		 */
159		vm_bounce_kva(0,0);
160	} else {
161		if (bmwait) {
162		/*
163		 * if anyone is waiting on the bounce-map, then wakeup
164		 */
165			wakeup((caddr_t) io_map);
166			bmwait = 0;
167		}
168	}
169	splx(s);
170}
171
172/*
173 * free count bounce buffer pages
174 */
175static void
176vm_bounce_page_free(pa, count)
177	vm_offset_t pa;
178	int count;
179{
180	int allocindex;
181	int index;
182	int bit;
183
184	if (count != 1)
185		panic("vm_bounce_page_free -- no support for > 1 page yet!!!");
186
187	for(index=0;index<bouncepages;index++) {
188		if( pa == bouncepa[index])
189			break;
190	}
191
192	if( index == bouncepages)
193		panic("vm_bounce_page_free: invalid bounce buffer");
194
195	allocindex = index / BITS_IN_UNSIGNED;
196	bit = index % BITS_IN_UNSIGNED;
197
198	bounceallocarray[allocindex] &= ~(1 << bit);
199
200	bouncefree += count;
201	if (bpwait) {
202		bpwait = 0;
203		wakeup((caddr_t) &bounceallocarray);
204	}
205}
206
207/*
208 * allocate count bounce buffer kva pages
209 */
210static vm_offset_t
211vm_bounce_kva(size, waitok)
212	int size;
213	int waitok;
214{
215	int i;
216	vm_offset_t kva = 0;
217	vm_offset_t off;
218	int s = splbio();
219more:
220	if (!bmfreeing && kvasfreecnt) {
221		bmfreeing = 1;
222		for (i = 0; i < kvasfreecnt; i++) {
223			for(off=0;off<kvaf[i].size;off+=PAGE_SIZE) {
224				pmap_kremove( kvaf[i].addr + off);
225			}
226			kmem_free_wakeup(io_map, kvaf[i].addr,
227				kvaf[i].size);
228		}
229		kvasfreecnt = 0;
230		bmfreeing = 0;
231		if( bmwait) {
232			bmwait = 0;
233			wakeup( (caddr_t) io_map);
234		}
235	}
236
237	if( size == 0) {
238		splx(s);
239		return 0;
240	}
241
242	if ((kva = kmem_alloc_pageable(io_map, size)) == 0) {
243		if( !waitok) {
244			splx(s);
245			return 0;
246		}
247		bmwait = 1;
248		tsleep((caddr_t) io_map, PRIBIO, "bmwait", 0);
249		goto more;
250	}
251	splx(s);
252	return kva;
253}
254
255/*
256 * same as vm_bounce_kva -- but really allocate (but takes pages as arg)
257 */
258vm_offset_t
259vm_bounce_kva_alloc(count)
260int count;
261{
262	int i;
263	vm_offset_t kva;
264	vm_offset_t pa;
265	if( bouncepages == 0) {
266		kva = (vm_offset_t) malloc(count*PAGE_SIZE, M_TEMP, M_WAITOK);
267		return kva;
268	}
269	kva = vm_bounce_kva(count*PAGE_SIZE, 1);
270	for(i=0;i<count;i++) {
271		pa = vm_bounce_page_find(1);
272		pmap_kenter(kva + i * PAGE_SIZE, pa);
273	}
274	return kva;
275}
276
277/*
278 * same as vm_bounce_kva_free -- but really free
279 */
280void
281vm_bounce_kva_alloc_free(kva, count)
282	vm_offset_t kva;
283	int count;
284{
285	int i;
286	vm_offset_t pa;
287	if( bouncepages == 0) {
288		free((caddr_t) kva, M_TEMP);
289		return;
290	}
291	for(i = 0; i < count; i++) {
292		pa = pmap_kextract(kva + i * PAGE_SIZE);
293		vm_bounce_page_free(pa, 1);
294	}
295	vm_bounce_kva_free(kva, count*PAGE_SIZE, 0);
296}
297
298/*
299 * do the things necessary to the struct buf to implement
300 * bounce buffers...  inserted before the disk sort
301 */
302void
303vm_bounce_alloc(bp)
304	struct buf *bp;
305{
306	int countvmpg;
307	vm_offset_t vastart, vaend;
308	vm_offset_t vapstart, vapend;
309	vm_offset_t va, kva;
310	vm_offset_t pa;
311	int dobounceflag = 0;
312	int i;
313
314	if (bouncepages == 0)
315		return;
316
317	if (bp->b_flags & B_BOUNCE) {
318		printf("vm_bounce_alloc: called recursively???\n");
319		return;
320	}
321
322	if (bp->b_bufsize < bp->b_bcount) {
323		printf(
324		    "vm_bounce_alloc: b_bufsize(0x%lx) < b_bcount(0x%lx) !!\n",
325			bp->b_bufsize, bp->b_bcount);
326		panic("vm_bounce_alloc");
327	}
328
329/*
330 *  This is not really necessary
331 *	if( bp->b_bufsize != bp->b_bcount) {
332 *		printf("size: %d, count: %d\n", bp->b_bufsize, bp->b_bcount);
333 *	}
334 */
335
336
337	vastart = (vm_offset_t) bp->b_data;
338	vaend = (vm_offset_t) bp->b_data + bp->b_bufsize;
339
340	vapstart = trunc_page(vastart);
341	vapend = round_page(vaend);
342	countvmpg = (vapend - vapstart) / PAGE_SIZE;
343
344/*
345 * if any page is above 16MB, then go into bounce-buffer mode
346 */
347	va = vapstart;
348	for (i = 0; i < countvmpg; i++) {
349		pa = pmap_kextract(va);
350		if (pa >= SIXTEENMEG)
351			++dobounceflag;
352		if( pa == 0)
353			panic("vm_bounce_alloc: Unmapped page");
354		va += PAGE_SIZE;
355	}
356	if (dobounceflag == 0)
357		return;
358
359	if (bouncepages < dobounceflag)
360		panic("Not enough bounce buffers!!!");
361
362/*
363 * allocate a replacement kva for b_addr
364 */
365	kva = vm_bounce_kva(countvmpg*PAGE_SIZE, 1);
366#if 0
367	printf("%s: vapstart: %x, vapend: %x, countvmpg: %d, kva: %x ",
368		(bp->b_flags & B_READ) ? "read":"write",
369			vapstart, vapend, countvmpg, kva);
370#endif
371	va = vapstart;
372	for (i = 0; i < countvmpg; i++) {
373		pa = pmap_kextract(va);
374		if (pa >= SIXTEENMEG) {
375			/*
376			 * allocate a replacement page
377			 */
378			vm_offset_t bpa = vm_bounce_page_find(1);
379			pmap_kenter(kva + (PAGE_SIZE * i), bpa);
380#if 0
381			printf("r(%d): (%x,%x,%x) ", i, va, pa, bpa);
382#endif
383			/*
384			 * if we are writing, the copy the data into the page
385			 */
386			if ((bp->b_flags & B_READ) == 0) {
387				bcopy((caddr_t) va, (caddr_t) kva + (PAGE_SIZE * i), PAGE_SIZE);
388			}
389		} else {
390			/*
391			 * use original page
392			 */
393			pmap_kenter(kva + (PAGE_SIZE * i), pa);
394		}
395		va += PAGE_SIZE;
396	}
397
398/*
399 * flag the buffer as being bounced
400 */
401	bp->b_flags |= B_BOUNCE;
402/*
403 * save the original buffer kva
404 */
405	bp->b_savekva = bp->b_data;
406/*
407 * put our new kva into the buffer (offset by original offset)
408 */
409	bp->b_data = (caddr_t) (((vm_offset_t) kva) |
410				((vm_offset_t) bp->b_savekva & PAGE_MASK));
411#if 0
412	printf("b_savekva: %x, newva: %x\n", bp->b_savekva, bp->b_data);
413#endif
414	return;
415}
416
417/*
418 * hook into biodone to free bounce buffer
419 */
420void
421vm_bounce_free(bp)
422	struct buf *bp;
423{
424	int i;
425	vm_offset_t origkva, bouncekva, bouncekvaend;
426
427/*
428 * if this isn't a bounced buffer, then just return
429 */
430	if ((bp->b_flags & B_BOUNCE) == 0)
431		return;
432
433/*
434 *  This check is not necessary
435 *	if (bp->b_bufsize != bp->b_bcount) {
436 *		printf("vm_bounce_free: b_bufsize=%d, b_bcount=%d\n",
437 *			bp->b_bufsize, bp->b_bcount);
438 *	}
439 */
440
441	origkva = (vm_offset_t) bp->b_savekva;
442	bouncekva = (vm_offset_t) bp->b_data;
443/*
444	printf("free: %d ", bp->b_bufsize);
445*/
446
447/*
448 * check every page in the kva space for b_addr
449 */
450	for (i = 0; i < bp->b_bufsize; ) {
451		vm_offset_t mybouncepa;
452		vm_offset_t copycount;
453
454		copycount = round_page(bouncekva + 1) - bouncekva;
455		mybouncepa = pmap_kextract(trunc_page(bouncekva));
456
457/*
458 * if this is a bounced pa, then process as one
459 */
460		if ( mybouncepa != pmap_kextract( trunc_page( origkva))) {
461			vm_offset_t tocopy = copycount;
462			if (i + tocopy > bp->b_bufsize)
463				tocopy = bp->b_bufsize - i;
464/*
465 * if this is a read, then copy from bounce buffer into original buffer
466 */
467			if (bp->b_flags & B_READ)
468				bcopy((caddr_t) bouncekva, (caddr_t) origkva, tocopy);
469/*
470 * free the bounce allocation
471 */
472
473/*
474			printf("(kva: %x, pa: %x)", bouncekva, mybouncepa);
475*/
476			vm_bounce_page_free(mybouncepa, 1);
477		}
478
479		origkva += copycount;
480		bouncekva += copycount;
481		i += copycount;
482	}
483
484/*
485	printf("\n");
486*/
487/*
488 * add the old kva into the "to free" list
489 */
490
491	bouncekva= trunc_page((vm_offset_t) bp->b_data);
492	bouncekvaend= round_page((vm_offset_t)bp->b_data + bp->b_bufsize);
493
494/*
495	printf("freeva: %d\n", (bouncekvaend - bouncekva) / PAGE_SIZE);
496*/
497	vm_bounce_kva_free( bouncekva, (bouncekvaend - bouncekva), 0);
498	bp->b_data = bp->b_savekva;
499	bp->b_savekva = 0;
500	bp->b_flags &= ~B_BOUNCE;
501
502	return;
503}
504
505
506/*
507 * init the bounce buffer system
508 */
509void
510vm_bounce_init()
511{
512	int i;
513
514	kvasfreecnt = 0;
515
516	if (bouncepages == 0)
517		return;
518
519	bounceallocarraysize = (bouncepages + BITS_IN_UNSIGNED - 1) / BITS_IN_UNSIGNED;
520	bounceallocarray = malloc(bounceallocarraysize * sizeof(unsigned), M_TEMP, M_NOWAIT);
521
522	if (!bounceallocarray)
523		panic("Cannot allocate bounce resource array");
524
525	bouncepa = malloc(bouncepages * sizeof(vm_offset_t), M_TEMP, M_NOWAIT);
526	if (!bouncepa)
527		panic("Cannot allocate physical memory array");
528
529	for(i=0;i<bounceallocarraysize;i++) {
530		bounceallocarray[i] = 0xffffffff;
531	}
532
533	for(i=0;i<bouncepages;i++) {
534		vm_offset_t pa;
535		if( (pa = pmap_kextract((vm_offset_t) bouncememory + i * PAGE_SIZE)) >= SIXTEENMEG)
536			panic("bounce memory out of range");
537		if( pa == 0)
538			panic("bounce memory not resident");
539		bouncepa[i] = pa;
540		bounceallocarray[i/(8*sizeof(int))] &= ~(1<<(i%(8*sizeof(int))));
541	}
542	bouncefree = bouncepages;
543
544}
545#endif /* BOUNCE_BUFFERS */
546
547/*
548 * quick version of vm_fault
549 */
550void
551vm_fault_quick(v, prot)
552	caddr_t v;
553	int prot;
554{
555	if (prot & VM_PROT_WRITE)
556		subyte(v, fubyte(v));
557	else
558		fubyte(v);
559}
560
561/*
562 * Finish a fork operation, with process p2 nearly set up.
563 * Copy and update the pcb, set up the stack so that the child
564 * ready to run and return to user mode.
565 */
566void
567cpu_fork(p1, p2)
568	register struct proc *p1, *p2;
569{
570	struct pcb *pcb2 = &p2->p_addr->u_pcb;
571
572	/* Ensure that p1's pcb is up to date. */
573	if (npxproc == p1)
574		npxsave(&p1->p_addr->u_pcb.pcb_savefpu);
575
576	/* Copy p1's pcb. */
577	p2->p_addr->u_pcb = p1->p_addr->u_pcb;
578
579	/*
580	 * Create a new fresh stack for the new process.
581	 * Copy the trap frame for the return to user mode as if from a
582	 * syscall.  This copies the user mode register values.
583	 */
584	p2->p_md.md_regs = (struct trapframe *)
585			   ((int)p2->p_addr + UPAGES * PAGE_SIZE) - 1;
586	*p2->p_md.md_regs = *p1->p_md.md_regs;
587
588	/*
589	 * Set registers for trampoline to user mode.  Leave space for the
590	 * return address on stack.  These are the kernel mode register values.
591	 */
592	pcb2->pcb_cr3 = vtophys(p2->p_vmspace->vm_pmap.pm_pdir);
593	pcb2->pcb_edi = p2->p_md.md_regs->tf_edi;
594	pcb2->pcb_esi = (int)fork_return;
595	pcb2->pcb_ebp = p2->p_md.md_regs->tf_ebp;
596	pcb2->pcb_esp = (int)p2->p_md.md_regs - sizeof(void *);
597	pcb2->pcb_ebx = (int)p2;
598	pcb2->pcb_eip = (int)fork_trampoline;
599	/*
600	 * pcb2->pcb_ldt:	duplicated below, if necessary.
601	 * pcb2->pcb_ldt_len:	cloned above.
602	 * pcb2->pcb_savefpu:	cloned above.
603	 * pcb2->pcb_flags:	cloned above (always 0 here?).
604	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
605	 */
606
607#ifdef VM86
608	/*
609	 * XXX don't copy the i/o pages.  this should probably be fixed.
610	 */
611	pcb2->pcb_ext = 0;
612#endif
613
614#ifdef USER_LDT
615        /* Copy the LDT, if necessary. */
616        if (pcb2->pcb_ldt != 0) {
617                union descriptor *new_ldt;
618                size_t len = pcb2->pcb_ldt_len * sizeof(union descriptor);
619
620                new_ldt = (union descriptor *)kmem_alloc(kernel_map, len);
621                bcopy(pcb2->pcb_ldt, new_ldt, len);
622                pcb2->pcb_ldt = (caddr_t)new_ldt;
623        }
624#endif
625
626	/*
627	 * Now, cpu_switch() can schedule the new process.
628	 * pcb_esp is loaded pointing to the cpu_switch() stack frame
629	 * containing the return address when exiting cpu_switch.
630	 * This will normally be to proc_trampoline(), which will have
631	 * %ebx loaded with the new proc's pointer.  proc_trampoline()
632	 * will set up a stack to call fork_return(p, frame); to complete
633	 * the return to user-mode.
634	 */
635}
636
637/*
638 * Intercept the return address from a freshly forked process that has NOT
639 * been scheduled yet.
640 *
641 * This is needed to make kernel threads stay in kernel mode.
642 */
643void
644cpu_set_fork_handler(p, func, arg)
645	struct proc *p;
646	void (*func) __P((void *));
647	void *arg;
648{
649	/*
650	 * Note that the trap frame follows the args, so the function
651	 * is really called like this:  func(arg, frame);
652	 */
653	p->p_addr->u_pcb.pcb_esi = (int) func;	/* function */
654	p->p_addr->u_pcb.pcb_ebx = (int) arg;	/* first arg */
655}
656
657void
658cpu_exit(p)
659	register struct proc *p;
660{
661#if defined(USER_LDT) || defined(VM86)
662	struct pcb *pcb = &p->p_addr->u_pcb;
663#endif
664
665#if NNPX > 0
666	npxexit(p);
667#endif	/* NNPX */
668#ifdef VM86
669	if (pcb->pcb_ext != 0) {
670	        /*
671		 * XXX do we need to move the TSS off the allocated pages
672		 * before freeing them?  (not done here)
673		 */
674		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ext,
675		    ctob(IOPAGES + 1));
676		pcb->pcb_ext = 0;
677	}
678#endif
679#ifdef USER_LDT
680	if (pcb->pcb_ldt != 0) {
681		if (pcb == curpcb)
682			lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
683		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
684			pcb->pcb_ldt_len * sizeof(union descriptor));
685		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
686	}
687#endif
688	cnt.v_swtch++;
689	cpu_switch(p);
690	panic("cpu_exit");
691}
692
693void
694cpu_wait(p)
695	struct proc *p;
696{
697	/* drop per-process resources */
698	pmap_dispose_proc(p);
699	vmspace_free(p->p_vmspace);
700}
701
702/*
703 * Dump the machine specific header information at the start of a core dump.
704 */
705int
706cpu_coredump(p, vp, cred)
707	struct proc *p;
708	struct vnode *vp;
709	struct ucred *cred;
710{
711
712	return (vn_rdwr(UIO_WRITE, vp, (caddr_t) p->p_addr, ctob(UPAGES),
713	    (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *)NULL,
714	    p));
715}
716
717#ifdef notyet
718static void
719setredzone(pte, vaddr)
720	u_short *pte;
721	caddr_t vaddr;
722{
723/* eventually do this by setting up an expand-down stack segment
724   for ss0: selector, allowing stack access down to top of u.
725   this means though that protection violations need to be handled
726   thru a double fault exception that must do an integral task
727   switch to a known good context, within which a dump can be
728   taken. a sensible scheme might be to save the initial context
729   used by sched (that has physical memory mapped 1:1 at bottom)
730   and take the dump while still in mapped mode */
731}
732#endif
733
734/*
735 * Convert kernel VA to physical address
736 */
737u_long
738kvtop(void *addr)
739{
740	vm_offset_t va;
741
742	va = pmap_kextract((vm_offset_t)addr);
743	if (va == 0)
744		panic("kvtop: zero page frame");
745	return((int)va);
746}
747
748/*
749 * Map an IO request into kernel virtual address space.
750 *
751 * All requests are (re)mapped into kernel VA space.
752 * Notice that we use b_bufsize for the size of the buffer
753 * to be mapped.  b_bcount might be modified by the driver.
754 */
755void
756vmapbuf(bp)
757	register struct buf *bp;
758{
759	register caddr_t addr, v, kva;
760	vm_offset_t pa;
761
762	if ((bp->b_flags & B_PHYS) == 0)
763		panic("vmapbuf");
764
765	for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page(bp->b_data);
766	    addr < bp->b_data + bp->b_bufsize;
767	    addr += PAGE_SIZE, v += PAGE_SIZE) {
768		/*
769		 * Do the vm_fault if needed; do the copy-on-write thing
770		 * when reading stuff off device into memory.
771		 */
772		vm_fault_quick(addr,
773			(bp->b_flags&B_READ)?(VM_PROT_READ|VM_PROT_WRITE):VM_PROT_READ);
774		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
775		if (pa == 0)
776			panic("vmapbuf: page not present");
777		vm_page_hold(PHYS_TO_VM_PAGE(pa));
778		pmap_kenter((vm_offset_t) v, pa);
779	}
780
781	kva = bp->b_saveaddr;
782	bp->b_saveaddr = bp->b_data;
783	bp->b_data = kva + (((vm_offset_t) bp->b_data) & PAGE_MASK);
784}
785
786/*
787 * Free the io map PTEs associated with this IO operation.
788 * We also invalidate the TLB entries and restore the original b_addr.
789 */
790void
791vunmapbuf(bp)
792	register struct buf *bp;
793{
794	register caddr_t addr;
795	vm_offset_t pa;
796
797	if ((bp->b_flags & B_PHYS) == 0)
798		panic("vunmapbuf");
799
800	for (addr = (caddr_t)trunc_page(bp->b_data);
801	    addr < bp->b_data + bp->b_bufsize;
802	    addr += PAGE_SIZE) {
803		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
804		pmap_kremove((vm_offset_t) addr);
805		vm_page_unhold(PHYS_TO_VM_PAGE(pa));
806	}
807
808	bp->b_data = bp->b_saveaddr;
809}
810
811/*
812 * Force reset the processor by invalidating the entire address space!
813 */
814void
815cpu_reset() {
816#ifdef PC98
817	/*
818	 * Attempt to do a CPU reset via CPU reset port.
819	 */
820	asm("cli");
821	outb(0x37, 0x0f);		/* SHUT0 = 0. */
822	outb(0x37, 0x0b);		/* SHUT1 = 0. */
823	outb(0xf0, 0x00);		/* Reset. */
824#else
825	/*
826	 * Attempt to do a CPU reset via the keyboard controller,
827	 * do not turn of the GateA20, as any machine that fails
828	 * to do the reset here would then end up in no man's land.
829	 */
830
831#if !defined(BROKEN_KEYBOARD_RESET)
832	outb(IO_KBD + 4, 0xFE);
833	DELAY(500000);	/* wait 0.5 sec to see if that did it */
834	printf("Keyboard reset did not work, attempting CPU shutdown\n");
835	DELAY(1000000);	/* wait 1 sec for printf to complete */
836#endif
837#endif /* PC98 */
838	/* force a shutdown by unmapping entire address space ! */
839	bzero((caddr_t) PTD, PAGE_SIZE);
840
841	/* "good night, sweet prince .... <THUNK!>" */
842	invltlb();
843	/* NOTREACHED */
844	while(1);
845}
846
847/*
848 * Grow the user stack to allow for 'sp'. This version grows the stack in
849 *	chunks of SGROWSIZ.
850 */
851int
852grow(p, sp)
853	struct proc *p;
854	u_int sp;
855{
856	unsigned int nss;
857	caddr_t v;
858	struct vmspace *vm = p->p_vmspace;
859
860	if ((caddr_t)sp <= vm->vm_maxsaddr || (unsigned)sp >= (unsigned)USRSTACK)
861	    return (1);
862
863	nss = roundup(USRSTACK - (unsigned)sp, PAGE_SIZE);
864
865	if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur)
866		return (0);
867
868	if (vm->vm_ssize && roundup(vm->vm_ssize << PAGE_SHIFT,
869	    SGROWSIZ) < nss) {
870		int grow_amount;
871		/*
872		 * If necessary, grow the VM that the stack occupies
873		 * to allow for the rlimit. This allows us to not have
874		 * to allocate all of the VM up-front in execve (which
875		 * is expensive).
876		 * Grow the VM by the amount requested rounded up to
877		 * the nearest SGROWSIZ to provide for some hysteresis.
878		 */
879		grow_amount = roundup((nss - (vm->vm_ssize << PAGE_SHIFT)), SGROWSIZ);
880		v = (char *)USRSTACK - roundup(vm->vm_ssize << PAGE_SHIFT,
881		    SGROWSIZ) - grow_amount;
882		/*
883		 * If there isn't enough room to extend by SGROWSIZ, then
884		 * just extend to the maximum size
885		 */
886		if (v < vm->vm_maxsaddr) {
887			v = vm->vm_maxsaddr;
888			grow_amount = MAXSSIZ - (vm->vm_ssize << PAGE_SHIFT);
889		}
890		if ((grow_amount == 0) || (vm_map_find(&vm->vm_map, NULL, 0, (vm_offset_t *)&v,
891		    grow_amount, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != KERN_SUCCESS)) {
892			return (0);
893		}
894		vm->vm_ssize += grow_amount >> PAGE_SHIFT;
895	}
896
897	return (1);
898}
899
900/*
901 * Implement the pre-zeroed page mechanism.
902 * This routine is called from the idle loop.
903 */
904int
905vm_page_zero_idle()
906{
907	static int free_rover;
908	vm_page_t m;
909	int s;
910
911#ifdef WRONG
912	if (cnt.v_free_count <= cnt.v_interrupt_free_min)
913		return (0);
914#endif
915	/*
916	 * XXX
917	 * We stop zeroing pages when there are sufficent prezeroed pages.
918	 * This threshold isn't really needed, except we want to
919	 * bypass unneeded calls to vm_page_list_find, and the
920	 * associated cache flush and latency.  The pre-zero will
921	 * still be called when there are significantly more
922	 * non-prezeroed pages than zeroed pages.  The threshold
923	 * of half the number of reserved pages is arbitrary, but
924	 * approximately the right amount.  Eventually, we should
925	 * perhaps interrupt the zero operation when a process
926	 * is found to be ready to run.
927	 */
928	if (cnt.v_free_count - vm_page_zero_count <= cnt.v_free_reserved / 2)
929		return (0);
930	s = splvm();
931	enable_intr();
932	m = vm_page_list_find(PQ_FREE, free_rover);
933	if (m != NULL) {
934		--(*vm_page_queues[m->queue].lcnt);
935		TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq);
936		splx(s);
937		pmap_zero_page(VM_PAGE_TO_PHYS(m));
938		(void)splvm();
939		m->queue = PQ_ZERO + m->pc;
940		++(*vm_page_queues[m->queue].lcnt);
941		TAILQ_INSERT_HEAD(vm_page_queues[m->queue].pl, m, pageq);
942		free_rover = (free_rover + PQ_PRIME3) & PQ_L2_MASK;
943		++vm_page_zero_count;
944	}
945	splx(s);
946	disable_intr();
947	return (1);
948}
949