vm_machdep.c revision 29330
1/*-
2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * Copyright (c) 1994 John Dyson
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department, and William Jolitz.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
40 *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
41 *	$Id: vm_machdep.c,v 1.88 1997/09/10 12:31:28 joerg Exp $
42 */
43
44#include "npx.h"
45#include "opt_bounce.h"
46
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/proc.h>
50#include <sys/malloc.h>
51#include <sys/buf.h>
52#include <sys/vnode.h>
53#include <sys/vmmeter.h>
54
55#include <machine/clock.h>
56#include <machine/cpu.h>
57#include <machine/md_var.h>
58
59#include <vm/vm.h>
60#include <vm/vm_param.h>
61#include <vm/vm_prot.h>
62#include <sys/lock.h>
63#include <vm/vm_kern.h>
64#include <vm/vm_page.h>
65#include <vm/vm_map.h>
66#include <vm/vm_extern.h>
67
68#include <sys/user.h>
69
70#ifdef PC98
71#include <pc98/pc98/pc98.h>
72#else
73#include <i386/isa/isa.h>
74#endif
75
76#ifdef BOUNCE_BUFFERS
77static vm_offset_t
78		vm_bounce_kva __P((int size, int waitok));
79static void	vm_bounce_kva_free __P((vm_offset_t addr, vm_offset_t size,
80					int now));
81static vm_offset_t
82		vm_bounce_page_find __P((int count));
83static void	vm_bounce_page_free __P((vm_offset_t pa, int count));
84
85static volatile int	kvasfreecnt;
86
87caddr_t		bouncememory;
88int		bouncepages;
89static int	bpwait;
90static vm_offset_t	*bouncepa;
91static int		bmwait, bmfreeing;
92
93#define BITS_IN_UNSIGNED (8*sizeof(unsigned))
94static int		bounceallocarraysize;
95static unsigned	*bounceallocarray;
96static int		bouncefree;
97
98#if defined(PC98) && defined (EPSON_BOUNCEDMA)
99#define SIXTEENMEG (3840*4096)			/* 15MB boundary */
100#else
101#define SIXTEENMEG (4096*4096)
102#endif
103#define MAXBKVA 1024
104int		maxbkva = MAXBKVA*PAGE_SIZE;
105
106/* special list that can be used at interrupt time for eventual kva free */
107static struct kvasfree {
108	vm_offset_t addr;
109	vm_offset_t size;
110} kvaf[MAXBKVA];
111
112/*
113 * get bounce buffer pages (count physically contiguous)
114 * (only 1 inplemented now)
115 */
116static vm_offset_t
117vm_bounce_page_find(count)
118	int count;
119{
120	int bit;
121	int s,i;
122
123	if (count != 1)
124		panic("vm_bounce_page_find -- no support for > 1 page yet!!!");
125
126	s = splbio();
127retry:
128	for (i = 0; i < bounceallocarraysize; i++) {
129		if (bounceallocarray[i] != 0xffffffff) {
130			bit = ffs(~bounceallocarray[i]);
131			if (bit) {
132				bounceallocarray[i] |= 1 << (bit - 1) ;
133				bouncefree -= count;
134				splx(s);
135				return bouncepa[(i * BITS_IN_UNSIGNED + (bit - 1))];
136			}
137		}
138	}
139	bpwait = 1;
140	tsleep((caddr_t) &bounceallocarray, PRIBIO, "bncwai", 0);
141	goto retry;
142}
143
144static void
145vm_bounce_kva_free(addr, size, now)
146	vm_offset_t addr;
147	vm_offset_t size;
148	int now;
149{
150	int s = splbio();
151	kvaf[kvasfreecnt].addr = addr;
152	kvaf[kvasfreecnt].size = size;
153	++kvasfreecnt;
154	if( now) {
155		/*
156		 * this will do wakeups
157		 */
158		vm_bounce_kva(0,0);
159	} else {
160		if (bmwait) {
161		/*
162		 * if anyone is waiting on the bounce-map, then wakeup
163		 */
164			wakeup((caddr_t) io_map);
165			bmwait = 0;
166		}
167	}
168	splx(s);
169}
170
171/*
172 * free count bounce buffer pages
173 */
174static void
175vm_bounce_page_free(pa, count)
176	vm_offset_t pa;
177	int count;
178{
179	int allocindex;
180	int index;
181	int bit;
182
183	if (count != 1)
184		panic("vm_bounce_page_free -- no support for > 1 page yet!!!");
185
186	for(index=0;index<bouncepages;index++) {
187		if( pa == bouncepa[index])
188			break;
189	}
190
191	if( index == bouncepages)
192		panic("vm_bounce_page_free: invalid bounce buffer");
193
194	allocindex = index / BITS_IN_UNSIGNED;
195	bit = index % BITS_IN_UNSIGNED;
196
197	bounceallocarray[allocindex] &= ~(1 << bit);
198
199	bouncefree += count;
200	if (bpwait) {
201		bpwait = 0;
202		wakeup((caddr_t) &bounceallocarray);
203	}
204}
205
206/*
207 * allocate count bounce buffer kva pages
208 */
209static vm_offset_t
210vm_bounce_kva(size, waitok)
211	int size;
212	int waitok;
213{
214	int i;
215	vm_offset_t kva = 0;
216	vm_offset_t off;
217	int s = splbio();
218more:
219	if (!bmfreeing && kvasfreecnt) {
220		bmfreeing = 1;
221		for (i = 0; i < kvasfreecnt; i++) {
222			for(off=0;off<kvaf[i].size;off+=PAGE_SIZE) {
223				pmap_kremove( kvaf[i].addr + off);
224			}
225			kmem_free_wakeup(io_map, kvaf[i].addr,
226				kvaf[i].size);
227		}
228		kvasfreecnt = 0;
229		bmfreeing = 0;
230		if( bmwait) {
231			bmwait = 0;
232			wakeup( (caddr_t) io_map);
233		}
234	}
235
236	if( size == 0) {
237		splx(s);
238		return 0;
239	}
240
241	if ((kva = kmem_alloc_pageable(io_map, size)) == 0) {
242		if( !waitok) {
243			splx(s);
244			return 0;
245		}
246		bmwait = 1;
247		tsleep((caddr_t) io_map, PRIBIO, "bmwait", 0);
248		goto more;
249	}
250	splx(s);
251	return kva;
252}
253
254/*
255 * same as vm_bounce_kva -- but really allocate (but takes pages as arg)
256 */
257vm_offset_t
258vm_bounce_kva_alloc(count)
259int count;
260{
261	int i;
262	vm_offset_t kva;
263	vm_offset_t pa;
264	if( bouncepages == 0) {
265		kva = (vm_offset_t) malloc(count*PAGE_SIZE, M_TEMP, M_WAITOK);
266		return kva;
267	}
268	kva = vm_bounce_kva(count*PAGE_SIZE, 1);
269	for(i=0;i<count;i++) {
270		pa = vm_bounce_page_find(1);
271		pmap_kenter(kva + i * PAGE_SIZE, pa);
272	}
273	return kva;
274}
275
276/*
277 * same as vm_bounce_kva_free -- but really free
278 */
279void
280vm_bounce_kva_alloc_free(kva, count)
281	vm_offset_t kva;
282	int count;
283{
284	int i;
285	vm_offset_t pa;
286	if( bouncepages == 0) {
287		free((caddr_t) kva, M_TEMP);
288		return;
289	}
290	for(i = 0; i < count; i++) {
291		pa = pmap_kextract(kva + i * PAGE_SIZE);
292		vm_bounce_page_free(pa, 1);
293	}
294	vm_bounce_kva_free(kva, count*PAGE_SIZE, 0);
295}
296
297/*
298 * do the things necessary to the struct buf to implement
299 * bounce buffers...  inserted before the disk sort
300 */
301void
302vm_bounce_alloc(bp)
303	struct buf *bp;
304{
305	int countvmpg;
306	vm_offset_t vastart, vaend;
307	vm_offset_t vapstart, vapend;
308	vm_offset_t va, kva;
309	vm_offset_t pa;
310	int dobounceflag = 0;
311	int i;
312
313	if (bouncepages == 0)
314		return;
315
316	if (bp->b_flags & B_BOUNCE) {
317		printf("vm_bounce_alloc: called recursively???\n");
318		return;
319	}
320
321	if (bp->b_bufsize < bp->b_bcount) {
322		printf(
323		    "vm_bounce_alloc: b_bufsize(0x%lx) < b_bcount(0x%lx) !!\n",
324			bp->b_bufsize, bp->b_bcount);
325		panic("vm_bounce_alloc");
326	}
327
328/*
329 *  This is not really necessary
330 *	if( bp->b_bufsize != bp->b_bcount) {
331 *		printf("size: %d, count: %d\n", bp->b_bufsize, bp->b_bcount);
332 *	}
333 */
334
335
336	vastart = (vm_offset_t) bp->b_data;
337	vaend = (vm_offset_t) bp->b_data + bp->b_bufsize;
338
339	vapstart = trunc_page(vastart);
340	vapend = round_page(vaend);
341	countvmpg = (vapend - vapstart) / PAGE_SIZE;
342
343/*
344 * if any page is above 16MB, then go into bounce-buffer mode
345 */
346	va = vapstart;
347	for (i = 0; i < countvmpg; i++) {
348		pa = pmap_kextract(va);
349		if (pa >= SIXTEENMEG)
350			++dobounceflag;
351		if( pa == 0)
352			panic("vm_bounce_alloc: Unmapped page");
353		va += PAGE_SIZE;
354	}
355	if (dobounceflag == 0)
356		return;
357
358	if (bouncepages < dobounceflag)
359		panic("Not enough bounce buffers!!!");
360
361/*
362 * allocate a replacement kva for b_addr
363 */
364	kva = vm_bounce_kva(countvmpg*PAGE_SIZE, 1);
365#if 0
366	printf("%s: vapstart: %x, vapend: %x, countvmpg: %d, kva: %x ",
367		(bp->b_flags & B_READ) ? "read":"write",
368			vapstart, vapend, countvmpg, kva);
369#endif
370	va = vapstart;
371	for (i = 0; i < countvmpg; i++) {
372		pa = pmap_kextract(va);
373		if (pa >= SIXTEENMEG) {
374			/*
375			 * allocate a replacement page
376			 */
377			vm_offset_t bpa = vm_bounce_page_find(1);
378			pmap_kenter(kva + (PAGE_SIZE * i), bpa);
379#if 0
380			printf("r(%d): (%x,%x,%x) ", i, va, pa, bpa);
381#endif
382			/*
383			 * if we are writing, the copy the data into the page
384			 */
385			if ((bp->b_flags & B_READ) == 0) {
386				bcopy((caddr_t) va, (caddr_t) kva + (PAGE_SIZE * i), PAGE_SIZE);
387			}
388		} else {
389			/*
390			 * use original page
391			 */
392			pmap_kenter(kva + (PAGE_SIZE * i), pa);
393		}
394		va += PAGE_SIZE;
395	}
396
397/*
398 * flag the buffer as being bounced
399 */
400	bp->b_flags |= B_BOUNCE;
401/*
402 * save the original buffer kva
403 */
404	bp->b_savekva = bp->b_data;
405/*
406 * put our new kva into the buffer (offset by original offset)
407 */
408	bp->b_data = (caddr_t) (((vm_offset_t) kva) |
409				((vm_offset_t) bp->b_savekva & PAGE_MASK));
410#if 0
411	printf("b_savekva: %x, newva: %x\n", bp->b_savekva, bp->b_data);
412#endif
413	return;
414}
415
416/*
417 * hook into biodone to free bounce buffer
418 */
419void
420vm_bounce_free(bp)
421	struct buf *bp;
422{
423	int i;
424	vm_offset_t origkva, bouncekva, bouncekvaend;
425
426/*
427 * if this isn't a bounced buffer, then just return
428 */
429	if ((bp->b_flags & B_BOUNCE) == 0)
430		return;
431
432/*
433 *  This check is not necessary
434 *	if (bp->b_bufsize != bp->b_bcount) {
435 *		printf("vm_bounce_free: b_bufsize=%d, b_bcount=%d\n",
436 *			bp->b_bufsize, bp->b_bcount);
437 *	}
438 */
439
440	origkva = (vm_offset_t) bp->b_savekva;
441	bouncekva = (vm_offset_t) bp->b_data;
442/*
443	printf("free: %d ", bp->b_bufsize);
444*/
445
446/*
447 * check every page in the kva space for b_addr
448 */
449	for (i = 0; i < bp->b_bufsize; ) {
450		vm_offset_t mybouncepa;
451		vm_offset_t copycount;
452
453		copycount = round_page(bouncekva + 1) - bouncekva;
454		mybouncepa = pmap_kextract(trunc_page(bouncekva));
455
456/*
457 * if this is a bounced pa, then process as one
458 */
459		if ( mybouncepa != pmap_kextract( trunc_page( origkva))) {
460			vm_offset_t tocopy = copycount;
461			if (i + tocopy > bp->b_bufsize)
462				tocopy = bp->b_bufsize - i;
463/*
464 * if this is a read, then copy from bounce buffer into original buffer
465 */
466			if (bp->b_flags & B_READ)
467				bcopy((caddr_t) bouncekva, (caddr_t) origkva, tocopy);
468/*
469 * free the bounce allocation
470 */
471
472/*
473			printf("(kva: %x, pa: %x)", bouncekva, mybouncepa);
474*/
475			vm_bounce_page_free(mybouncepa, 1);
476		}
477
478		origkva += copycount;
479		bouncekva += copycount;
480		i += copycount;
481	}
482
483/*
484	printf("\n");
485*/
486/*
487 * add the old kva into the "to free" list
488 */
489
490	bouncekva= trunc_page((vm_offset_t) bp->b_data);
491	bouncekvaend= round_page((vm_offset_t)bp->b_data + bp->b_bufsize);
492
493/*
494	printf("freeva: %d\n", (bouncekvaend - bouncekva) / PAGE_SIZE);
495*/
496	vm_bounce_kva_free( bouncekva, (bouncekvaend - bouncekva), 0);
497	bp->b_data = bp->b_savekva;
498	bp->b_savekva = 0;
499	bp->b_flags &= ~B_BOUNCE;
500
501	return;
502}
503
504
505/*
506 * init the bounce buffer system
507 */
508void
509vm_bounce_init()
510{
511	int i;
512
513	kvasfreecnt = 0;
514
515	if (bouncepages == 0)
516		return;
517
518	bounceallocarraysize = (bouncepages + BITS_IN_UNSIGNED - 1) / BITS_IN_UNSIGNED;
519	bounceallocarray = malloc(bounceallocarraysize * sizeof(unsigned), M_TEMP, M_NOWAIT);
520
521	if (!bounceallocarray)
522		panic("Cannot allocate bounce resource array");
523
524	bouncepa = malloc(bouncepages * sizeof(vm_offset_t), M_TEMP, M_NOWAIT);
525	if (!bouncepa)
526		panic("Cannot allocate physical memory array");
527
528	for(i=0;i<bounceallocarraysize;i++) {
529		bounceallocarray[i] = 0xffffffff;
530	}
531
532	for(i=0;i<bouncepages;i++) {
533		vm_offset_t pa;
534		if( (pa = pmap_kextract((vm_offset_t) bouncememory + i * PAGE_SIZE)) >= SIXTEENMEG)
535			panic("bounce memory out of range");
536		if( pa == 0)
537			panic("bounce memory not resident");
538		bouncepa[i] = pa;
539		bounceallocarray[i/(8*sizeof(int))] &= ~(1<<(i%(8*sizeof(int))));
540	}
541	bouncefree = bouncepages;
542
543}
544#endif /* BOUNCE_BUFFERS */
545
546/*
547 * quick version of vm_fault
548 */
549void
550vm_fault_quick(v, prot)
551	caddr_t v;
552	int prot;
553{
554	if (prot & VM_PROT_WRITE)
555		subyte(v, fubyte(v));
556	else
557		fubyte(v);
558}
559
560/*
561 * Finish a fork operation, with process p2 nearly set up.
562 * Copy and update the pcb, set up the stack so that the child
563 * ready to run and return to user mode.
564 */
565void
566cpu_fork(p1, p2)
567	register struct proc *p1, *p2;
568{
569	struct pcb *pcb2 = &p2->p_addr->u_pcb;
570
571	/* Ensure that p1's pcb is up to date. */
572	if (npxproc == p1)
573		npxsave(&p1->p_addr->u_pcb.pcb_savefpu);
574
575	/* Copy p1's pcb. */
576	p2->p_addr->u_pcb = p1->p_addr->u_pcb;
577
578	/*
579	 * Create a new fresh stack for the new process.
580	 * Copy the trap frame for the return to user mode as if from a
581	 * syscall.  This copies the user mode register values.
582	 */
583	p2->p_md.md_regs = (struct trapframe *)
584			   ((int)p2->p_addr + UPAGES * PAGE_SIZE) - 1;
585	*p2->p_md.md_regs = *p1->p_md.md_regs;
586
587	/*
588	 * Set registers for trampoline to user mode.  Leave space for the
589	 * return address on stack.  These are the kernel mode register values.
590	 */
591	pcb2->pcb_cr3 = vtophys(p2->p_vmspace->vm_pmap.pm_pdir);
592	pcb2->pcb_edi = p2->p_md.md_regs->tf_edi;
593	pcb2->pcb_esi = (int)fork_return;
594	pcb2->pcb_ebp = p2->p_md.md_regs->tf_ebp;
595	pcb2->pcb_esp = (int)p2->p_md.md_regs - sizeof(void *);
596	pcb2->pcb_ebx = (int)p2;
597	pcb2->pcb_eip = (int)fork_trampoline;
598	/*
599	 * pcb2->pcb_ldt:	duplicated below, if necessary.
600	 * pcb2->pcb_ldt_len:	cloned above.
601	 * pcb2->pcb_savefpu:	cloned above.
602	 * pcb2->pcb_flags:	cloned above (always 0 here?).
603	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
604	 */
605
606#ifdef VM86
607	/*
608	 * XXX don't copy the i/o pages.  this should probably be fixed.
609	 */
610	pcb2->pcb_ext = 0;
611#endif
612
613#ifdef USER_LDT
614        /* Copy the LDT, if necessary. */
615        if (pcb2->pcb_ldt != 0) {
616                union descriptor *new_ldt;
617                size_t len = pcb2->pcb_ldt_len * sizeof(union descriptor);
618
619                new_ldt = (union descriptor *)kmem_alloc(kernel_map, len);
620                bcopy(pcb2->pcb_ldt, new_ldt, len);
621                pcb2->pcb_ldt = (caddr_t)new_ldt;
622        }
623#endif
624
625	/*
626	 * Now, cpu_switch() can schedule the new process.
627	 * pcb_esp is loaded pointing to the cpu_switch() stack frame
628	 * containing the return address when exiting cpu_switch.
629	 * This will normally be to proc_trampoline(), which will have
630	 * %ebx loaded with the new proc's pointer.  proc_trampoline()
631	 * will set up a stack to call fork_return(p, frame); to complete
632	 * the return to user-mode.
633	 */
634}
635
636/*
637 * Intercept the return address from a freshly forked process that has NOT
638 * been scheduled yet.
639 *
640 * This is needed to make kernel threads stay in kernel mode.
641 */
642void
643cpu_set_fork_handler(p, func, arg)
644	struct proc *p;
645	void (*func) __P((void *));
646	void *arg;
647{
648	/*
649	 * Note that the trap frame follows the args, so the function
650	 * is really called like this:  func(arg, frame);
651	 */
652	p->p_addr->u_pcb.pcb_esi = (int) func;	/* function */
653	p->p_addr->u_pcb.pcb_ebx = (int) arg;	/* first arg */
654}
655
656void
657cpu_exit(p)
658	register struct proc *p;
659{
660#if defined(USER_LDT) || defined(VM86)
661	struct pcb *pcb = &p->p_addr->u_pcb;
662#endif
663
664#if NNPX > 0
665	npxexit(p);
666#endif	/* NNPX */
667#ifdef VM86
668	if (pcb->pcb_ext != 0) {
669	        /*
670		 * XXX do we need to move the TSS off the allocated pages
671		 * before freeing them?  (not done here)
672		 */
673		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ext,
674		    ctob(IOPAGES + 1));
675		pcb->pcb_ext = 0;
676	}
677#endif
678#ifdef USER_LDT
679	if (pcb->pcb_ldt != 0) {
680		if (pcb == curpcb)
681			lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
682		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
683			pcb->pcb_ldt_len * sizeof(union descriptor));
684		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
685	}
686#endif
687	cnt.v_swtch++;
688	cpu_switch(p);
689	panic("cpu_exit");
690}
691
692void
693cpu_wait(p)
694	struct proc *p;
695{
696	/* drop per-process resources */
697	pmap_dispose_proc(p);
698	vmspace_free(p->p_vmspace);
699}
700
701/*
702 * Dump the machine specific header information at the start of a core dump.
703 */
704int
705cpu_coredump(p, vp, cred)
706	struct proc *p;
707	struct vnode *vp;
708	struct ucred *cred;
709{
710
711	return (vn_rdwr(UIO_WRITE, vp, (caddr_t) p->p_addr, ctob(UPAGES),
712	    (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *)NULL,
713	    p));
714}
715
716#ifdef notyet
717static void
718setredzone(pte, vaddr)
719	u_short *pte;
720	caddr_t vaddr;
721{
722/* eventually do this by setting up an expand-down stack segment
723   for ss0: selector, allowing stack access down to top of u.
724   this means though that protection violations need to be handled
725   thru a double fault exception that must do an integral task
726   switch to a known good context, within which a dump can be
727   taken. a sensible scheme might be to save the initial context
728   used by sched (that has physical memory mapped 1:1 at bottom)
729   and take the dump while still in mapped mode */
730}
731#endif
732
733/*
734 * Convert kernel VA to physical address
735 */
736u_long
737kvtop(void *addr)
738{
739	vm_offset_t va;
740
741	va = pmap_kextract((vm_offset_t)addr);
742	if (va == 0)
743		panic("kvtop: zero page frame");
744	return((int)va);
745}
746
747/*
748 * Map an IO request into kernel virtual address space.
749 *
750 * All requests are (re)mapped into kernel VA space.
751 * Notice that we use b_bufsize for the size of the buffer
752 * to be mapped.  b_bcount might be modified by the driver.
753 */
754void
755vmapbuf(bp)
756	register struct buf *bp;
757{
758	register caddr_t addr, v, kva;
759	vm_offset_t pa;
760
761	if ((bp->b_flags & B_PHYS) == 0)
762		panic("vmapbuf");
763
764	for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page(bp->b_data);
765	    addr < bp->b_data + bp->b_bufsize;
766	    addr += PAGE_SIZE, v += PAGE_SIZE) {
767		/*
768		 * Do the vm_fault if needed; do the copy-on-write thing
769		 * when reading stuff off device into memory.
770		 */
771		vm_fault_quick(addr,
772			(bp->b_flags&B_READ)?(VM_PROT_READ|VM_PROT_WRITE):VM_PROT_READ);
773		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
774		if (pa == 0)
775			panic("vmapbuf: page not present");
776		vm_page_hold(PHYS_TO_VM_PAGE(pa));
777		pmap_kenter((vm_offset_t) v, pa);
778	}
779
780	kva = bp->b_saveaddr;
781	bp->b_saveaddr = bp->b_data;
782	bp->b_data = kva + (((vm_offset_t) bp->b_data) & PAGE_MASK);
783}
784
785/*
786 * Free the io map PTEs associated with this IO operation.
787 * We also invalidate the TLB entries and restore the original b_addr.
788 */
789void
790vunmapbuf(bp)
791	register struct buf *bp;
792{
793	register caddr_t addr;
794	vm_offset_t pa;
795
796	if ((bp->b_flags & B_PHYS) == 0)
797		panic("vunmapbuf");
798
799	for (addr = (caddr_t)trunc_page(bp->b_data);
800	    addr < bp->b_data + bp->b_bufsize;
801	    addr += PAGE_SIZE) {
802		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
803		pmap_kremove((vm_offset_t) addr);
804		vm_page_unhold(PHYS_TO_VM_PAGE(pa));
805	}
806
807	bp->b_data = bp->b_saveaddr;
808}
809
810/*
811 * Force reset the processor by invalidating the entire address space!
812 */
813void
814cpu_reset() {
815#ifdef PC98
816	/*
817	 * Attempt to do a CPU reset via CPU reset port.
818	 */
819	asm("cli");
820	outb(0x37, 0x0f);		/* SHUT0 = 0. */
821	outb(0x37, 0x0b);		/* SHUT1 = 0. */
822	outb(0xf0, 0x00);		/* Reset. */
823#else
824	/*
825	 * Attempt to do a CPU reset via the keyboard controller,
826	 * do not turn of the GateA20, as any machine that fails
827	 * to do the reset here would then end up in no man's land.
828	 */
829
830#if !defined(BROKEN_KEYBOARD_RESET)
831	outb(IO_KBD + 4, 0xFE);
832	DELAY(500000);	/* wait 0.5 sec to see if that did it */
833	printf("Keyboard reset did not work, attempting CPU shutdown\n");
834	DELAY(1000000);	/* wait 1 sec for printf to complete */
835#endif
836#endif /* PC98 */
837	/* force a shutdown by unmapping entire address space ! */
838	bzero((caddr_t) PTD, PAGE_SIZE);
839
840	/* "good night, sweet prince .... <THUNK!>" */
841	invltlb();
842	/* NOTREACHED */
843	while(1);
844}
845
846/*
847 * Grow the user stack to allow for 'sp'. This version grows the stack in
848 *	chunks of SGROWSIZ.
849 */
850int
851grow(p, sp)
852	struct proc *p;
853	u_int sp;
854{
855	unsigned int nss;
856	caddr_t v;
857	struct vmspace *vm = p->p_vmspace;
858
859	if ((caddr_t)sp <= vm->vm_maxsaddr || (unsigned)sp >= (unsigned)USRSTACK)
860	    return (1);
861
862	nss = roundup(USRSTACK - (unsigned)sp, PAGE_SIZE);
863
864	if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur)
865		return (0);
866
867	if (vm->vm_ssize && roundup(vm->vm_ssize << PAGE_SHIFT,
868	    SGROWSIZ) < nss) {
869		int grow_amount;
870		/*
871		 * If necessary, grow the VM that the stack occupies
872		 * to allow for the rlimit. This allows us to not have
873		 * to allocate all of the VM up-front in execve (which
874		 * is expensive).
875		 * Grow the VM by the amount requested rounded up to
876		 * the nearest SGROWSIZ to provide for some hysteresis.
877		 */
878		grow_amount = roundup((nss - (vm->vm_ssize << PAGE_SHIFT)), SGROWSIZ);
879		v = (char *)USRSTACK - roundup(vm->vm_ssize << PAGE_SHIFT,
880		    SGROWSIZ) - grow_amount;
881		/*
882		 * If there isn't enough room to extend by SGROWSIZ, then
883		 * just extend to the maximum size
884		 */
885		if (v < vm->vm_maxsaddr) {
886			v = vm->vm_maxsaddr;
887			grow_amount = MAXSSIZ - (vm->vm_ssize << PAGE_SHIFT);
888		}
889		if ((grow_amount == 0) || (vm_map_find(&vm->vm_map, NULL, 0, (vm_offset_t *)&v,
890		    grow_amount, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != KERN_SUCCESS)) {
891			return (0);
892		}
893		vm->vm_ssize += grow_amount >> PAGE_SHIFT;
894	}
895
896	return (1);
897}
898
899/*
900 * Implement the pre-zeroed page mechanism.
901 * This routine is called from the idle loop.
902 */
903int
904vm_page_zero_idle()
905{
906	static int free_rover;
907	vm_page_t m;
908	int s;
909
910#ifdef WRONG
911	if (cnt.v_free_count <= cnt.v_interrupt_free_min)
912		return (0);
913#endif
914	/*
915	 * XXX
916	 * We stop zeroing pages when there are sufficent prezeroed pages.
917	 * This threshold isn't really needed, except we want to
918	 * bypass unneeded calls to vm_page_list_find, and the
919	 * associated cache flush and latency.  The pre-zero will
920	 * still be called when there are significantly more
921	 * non-prezeroed pages than zeroed pages.  The threshold
922	 * of half the number of reserved pages is arbitrary, but
923	 * approximately the right amount.  Eventually, we should
924	 * perhaps interrupt the zero operation when a process
925	 * is found to be ready to run.
926	 */
927	if (cnt.v_free_count - vm_page_zero_count <= cnt.v_free_reserved / 2)
928		return (0);
929#ifdef SMP
930	get_mplock();
931#endif
932	s = splvm();
933	enable_intr();
934	m = vm_page_list_find(PQ_FREE, free_rover);
935	if (m != NULL) {
936		--(*vm_page_queues[m->queue].lcnt);
937		TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq);
938		splx(s);
939#ifdef SMP
940		rel_mplock();
941#endif
942		pmap_zero_page(VM_PAGE_TO_PHYS(m));
943#ifdef SMP
944		get_mplock();
945#endif
946		(void)splvm();
947		m->queue = PQ_ZERO + m->pc;
948		++(*vm_page_queues[m->queue].lcnt);
949		TAILQ_INSERT_HEAD(vm_page_queues[m->queue].pl, m, pageq);
950		free_rover = (free_rover + PQ_PRIME3) & PQ_L2_MASK;
951		++vm_page_zero_count;
952	}
953	splx(s);
954	disable_intr();
955#ifdef SMP
956	rel_mplock();
957#endif
958	return (1);
959}
960
961/*
962 * Tell whether this address is in some physical memory region.
963 * Currently used by the kernel coredump code in order to avoid
964 * dumping the ``ISA memory hole'' which could cause indefinite hangs,
965 * or other unpredictable behaviour.
966 */
967
968#include "isa.h"
969
970int
971is_physical_memory(addr)
972	vm_offset_t addr;
973{
974
975#if NISA > 0
976	/* The ISA ``memory hole''. */
977	if (addr >= 0xa0000 && addr < 0x100000)
978		return 0;
979#endif
980
981	/*
982	 * stuff other tests for known memory-mapped devices (PCI?)
983	 * here
984	 */
985
986	return 1;
987}
988