vm_machdep.c revision 33108
1/*-
2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * Copyright (c) 1994 John Dyson
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department, and William Jolitz.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
40 *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
41 *	$Id: vm_machdep.c,v 1.97 1998/01/30 10:26:26 dyson Exp $
42 */
43
44#include "npx.h"
45#include "opt_bounce.h"
46#include "opt_diagnostic.h"
47#include "opt_user_ldt.h"
48#include "opt_vm86.h"
49
50#include <sys/param.h>
51#include <sys/systm.h>
52#include <sys/proc.h>
53#include <sys/malloc.h>
54#include <sys/buf.h>
55#include <sys/vnode.h>
56#include <sys/vmmeter.h>
57
58#include <machine/clock.h>
59#include <machine/cpu.h>
60#include <machine/md_var.h>
61#ifdef SMP
62#include <machine/smp.h>
63#endif
64
65#include <vm/vm.h>
66#include <vm/vm_param.h>
67#include <vm/vm_prot.h>
68#include <sys/lock.h>
69#include <vm/vm_kern.h>
70#include <vm/vm_page.h>
71#include <vm/vm_map.h>
72#include <vm/vm_extern.h>
73
74#include <sys/user.h>
75
76#ifdef PC98
77#include <pc98/pc98/pc98.h>
78#else
79#include <i386/isa/isa.h>
80#endif
81
82#ifdef BOUNCE_BUFFERS
83static vm_offset_t
84		vm_bounce_kva __P((int size, int waitok));
85static void	vm_bounce_kva_free __P((vm_offset_t addr, vm_offset_t size,
86					int now));
87static vm_offset_t
88		vm_bounce_page_find __P((int count));
89static void	vm_bounce_page_free __P((vm_offset_t pa, int count));
90
91static volatile int	kvasfreecnt;
92
93caddr_t		bouncememory;
94static int	bpwait;
95static vm_offset_t	*bouncepa;
96static int		bmwait, bmfreeing;
97
98#define BITS_IN_UNSIGNED (8*sizeof(unsigned))
99static int		bounceallocarraysize;
100static unsigned	*bounceallocarray;
101static int		bouncefree;
102
103#if defined(PC98) && defined (EPSON_BOUNCEDMA)
104#define SIXTEENMEG (3840*4096)			/* 15MB boundary */
105#else
106#define SIXTEENMEG (4096*4096)
107#endif
108#define MAXBKVA 1024
109int		maxbkva = MAXBKVA*PAGE_SIZE;
110
111/* special list that can be used at interrupt time for eventual kva free */
112static struct kvasfree {
113	vm_offset_t addr;
114	vm_offset_t size;
115} kvaf[MAXBKVA];
116
117/*
118 * get bounce buffer pages (count physically contiguous)
119 * (only 1 inplemented now)
120 */
121static vm_offset_t
122vm_bounce_page_find(count)
123	int count;
124{
125	int bit;
126	int s,i;
127
128	if (count != 1)
129		panic("vm_bounce_page_find -- no support for > 1 page yet!!!");
130
131	s = splbio();
132retry:
133	for (i = 0; i < bounceallocarraysize; i++) {
134		if (bounceallocarray[i] != 0xffffffff) {
135			bit = ffs(~bounceallocarray[i]);
136			if (bit) {
137				bounceallocarray[i] |= 1 << (bit - 1) ;
138				bouncefree -= count;
139				splx(s);
140				return bouncepa[(i * BITS_IN_UNSIGNED + (bit - 1))];
141			}
142		}
143	}
144	bpwait = 1;
145	tsleep((caddr_t) &bounceallocarray, PRIBIO, "bncwai", 0);
146	goto retry;
147}
148
149static void
150vm_bounce_kva_free(addr, size, now)
151	vm_offset_t addr;
152	vm_offset_t size;
153	int now;
154{
155	int s = splbio();
156	kvaf[kvasfreecnt].addr = addr;
157	kvaf[kvasfreecnt].size = size;
158	++kvasfreecnt;
159	if( now) {
160		/*
161		 * this will do wakeups
162		 */
163		vm_bounce_kva(0,0);
164	} else {
165		if (bmwait) {
166		/*
167		 * if anyone is waiting on the bounce-map, then wakeup
168		 */
169			wakeup((caddr_t) io_map);
170			bmwait = 0;
171		}
172	}
173	splx(s);
174}
175
176/*
177 * free count bounce buffer pages
178 */
179static void
180vm_bounce_page_free(pa, count)
181	vm_offset_t pa;
182	int count;
183{
184	int allocindex;
185	int index;
186	int bit;
187
188	if (count != 1)
189		panic("vm_bounce_page_free -- no support for > 1 page yet!!!");
190
191	for(index=0;index<bouncepages;index++) {
192		if( pa == bouncepa[index])
193			break;
194	}
195
196	if( index == bouncepages)
197		panic("vm_bounce_page_free: invalid bounce buffer");
198
199	allocindex = index / BITS_IN_UNSIGNED;
200	bit = index % BITS_IN_UNSIGNED;
201
202	bounceallocarray[allocindex] &= ~(1 << bit);
203
204	bouncefree += count;
205	if (bpwait) {
206		bpwait = 0;
207		wakeup((caddr_t) &bounceallocarray);
208	}
209}
210
211/*
212 * allocate count bounce buffer kva pages
213 */
214static vm_offset_t
215vm_bounce_kva(size, waitok)
216	int size;
217	int waitok;
218{
219	int i;
220	vm_offset_t kva = 0;
221	vm_offset_t off;
222	int s = splbio();
223more:
224	if (!bmfreeing && kvasfreecnt) {
225		bmfreeing = 1;
226		for (i = 0; i < kvasfreecnt; i++) {
227			for(off=0;off<kvaf[i].size;off+=PAGE_SIZE) {
228				pmap_kremove( kvaf[i].addr + off);
229			}
230			kmem_free_wakeup(io_map, kvaf[i].addr,
231				kvaf[i].size);
232		}
233		kvasfreecnt = 0;
234		bmfreeing = 0;
235		if( bmwait) {
236			bmwait = 0;
237			wakeup( (caddr_t) io_map);
238		}
239	}
240
241	if( size == 0) {
242		splx(s);
243		return 0;
244	}
245
246	if ((kva = kmem_alloc_pageable(io_map, size)) == 0) {
247		if( !waitok) {
248			splx(s);
249			return 0;
250		}
251		bmwait = 1;
252		tsleep((caddr_t) io_map, PRIBIO, "bmwait", 0);
253		goto more;
254	}
255	splx(s);
256	return kva;
257}
258
259/*
260 * same as vm_bounce_kva -- but really allocate (but takes pages as arg)
261 */
262vm_offset_t
263vm_bounce_kva_alloc(count)
264int count;
265{
266	int i;
267	vm_offset_t kva;
268	vm_offset_t pa;
269	if( bouncepages == 0) {
270		kva = (vm_offset_t) malloc(count*PAGE_SIZE, M_TEMP, M_WAITOK);
271		return kva;
272	}
273	kva = vm_bounce_kva(count*PAGE_SIZE, 1);
274	for(i=0;i<count;i++) {
275		pa = vm_bounce_page_find(1);
276		pmap_kenter(kva + i * PAGE_SIZE, pa);
277	}
278	return kva;
279}
280
281/*
282 * same as vm_bounce_kva_free -- but really free
283 */
284void
285vm_bounce_kva_alloc_free(kva, count)
286	vm_offset_t kva;
287	int count;
288{
289	int i;
290	vm_offset_t pa;
291	if( bouncepages == 0) {
292		free((caddr_t) kva, M_TEMP);
293		return;
294	}
295	for(i = 0; i < count; i++) {
296		pa = pmap_kextract(kva + i * PAGE_SIZE);
297		vm_bounce_page_free(pa, 1);
298	}
299	vm_bounce_kva_free(kva, count*PAGE_SIZE, 0);
300}
301
302/*
303 * do the things necessary to the struct buf to implement
304 * bounce buffers...  inserted before the disk sort
305 */
306void
307vm_bounce_alloc(bp)
308	struct buf *bp;
309{
310	int countvmpg;
311	vm_offset_t vastart, vaend;
312	vm_offset_t vapstart, vapend;
313	vm_offset_t va, kva;
314	vm_offset_t pa;
315	int dobounceflag = 0;
316	int i;
317
318	if (bouncepages == 0)
319		return;
320
321	if (bp->b_flags & B_BOUNCE) {
322		printf("vm_bounce_alloc: called recursively???\n");
323		return;
324	}
325
326	if (bp->b_bufsize < bp->b_bcount) {
327		printf(
328		    "vm_bounce_alloc: b_bufsize(0x%lx) < b_bcount(0x%lx) !!\n",
329			bp->b_bufsize, bp->b_bcount);
330		panic("vm_bounce_alloc");
331	}
332
333/*
334 *  This is not really necessary
335 *	if( bp->b_bufsize != bp->b_bcount) {
336 *		printf("size: %d, count: %d\n", bp->b_bufsize, bp->b_bcount);
337 *	}
338 */
339
340
341	vastart = (vm_offset_t) bp->b_data;
342	vaend = (vm_offset_t) bp->b_data + bp->b_bufsize;
343
344	vapstart = trunc_page(vastart);
345	vapend = round_page(vaend);
346	countvmpg = (vapend - vapstart) / PAGE_SIZE;
347
348/*
349 * if any page is above 16MB, then go into bounce-buffer mode
350 */
351	va = vapstart;
352	for (i = 0; i < countvmpg; i++) {
353		pa = pmap_kextract(va);
354		if (pa >= SIXTEENMEG)
355			++dobounceflag;
356		if( pa == 0)
357			panic("vm_bounce_alloc: Unmapped page");
358		va += PAGE_SIZE;
359	}
360	if (dobounceflag == 0)
361		return;
362
363	if (bouncepages < dobounceflag)
364		panic("Not enough bounce buffers!!!");
365
366/*
367 * allocate a replacement kva for b_addr
368 */
369	kva = vm_bounce_kva(countvmpg*PAGE_SIZE, 1);
370#if 0
371	printf("%s: vapstart: %x, vapend: %x, countvmpg: %d, kva: %x ",
372		(bp->b_flags & B_READ) ? "read":"write",
373			vapstart, vapend, countvmpg, kva);
374#endif
375	va = vapstart;
376	for (i = 0; i < countvmpg; i++) {
377		pa = pmap_kextract(va);
378		if (pa >= SIXTEENMEG) {
379			/*
380			 * allocate a replacement page
381			 */
382			vm_offset_t bpa = vm_bounce_page_find(1);
383			pmap_kenter(kva + (PAGE_SIZE * i), bpa);
384#if 0
385			printf("r(%d): (%x,%x,%x) ", i, va, pa, bpa);
386#endif
387			/*
388			 * if we are writing, the copy the data into the page
389			 */
390			if ((bp->b_flags & B_READ) == 0) {
391				bcopy((caddr_t) va, (caddr_t) kva + (PAGE_SIZE * i), PAGE_SIZE);
392			}
393		} else {
394			/*
395			 * use original page
396			 */
397			pmap_kenter(kva + (PAGE_SIZE * i), pa);
398		}
399		va += PAGE_SIZE;
400	}
401
402/*
403 * flag the buffer as being bounced
404 */
405	bp->b_flags |= B_BOUNCE;
406/*
407 * save the original buffer kva
408 */
409	bp->b_savekva = bp->b_data;
410/*
411 * put our new kva into the buffer (offset by original offset)
412 */
413	bp->b_data = (caddr_t) (((vm_offset_t) kva) |
414				((vm_offset_t) bp->b_savekva & PAGE_MASK));
415#if 0
416	printf("b_savekva: %x, newva: %x\n", bp->b_savekva, bp->b_data);
417#endif
418	return;
419}
420
421/*
422 * hook into biodone to free bounce buffer
423 */
424void
425vm_bounce_free(bp)
426	struct buf *bp;
427{
428	int i;
429	vm_offset_t origkva, bouncekva, bouncekvaend;
430
431/*
432 * if this isn't a bounced buffer, then just return
433 */
434	if ((bp->b_flags & B_BOUNCE) == 0)
435		return;
436
437/*
438 *  This check is not necessary
439 *	if (bp->b_bufsize != bp->b_bcount) {
440 *		printf("vm_bounce_free: b_bufsize=%d, b_bcount=%d\n",
441 *			bp->b_bufsize, bp->b_bcount);
442 *	}
443 */
444
445	origkva = (vm_offset_t) bp->b_savekva;
446	bouncekva = (vm_offset_t) bp->b_data;
447/*
448	printf("free: %d ", bp->b_bufsize);
449*/
450
451/*
452 * check every page in the kva space for b_addr
453 */
454	for (i = 0; i < bp->b_bufsize; ) {
455		vm_offset_t mybouncepa;
456		vm_offset_t copycount;
457
458		copycount = round_page(bouncekva + 1) - bouncekva;
459		mybouncepa = pmap_kextract(trunc_page(bouncekva));
460
461/*
462 * if this is a bounced pa, then process as one
463 */
464		if ( mybouncepa != pmap_kextract( trunc_page( origkva))) {
465			vm_offset_t tocopy = copycount;
466			if (i + tocopy > bp->b_bufsize)
467				tocopy = bp->b_bufsize - i;
468/*
469 * if this is a read, then copy from bounce buffer into original buffer
470 */
471			if (bp->b_flags & B_READ)
472				bcopy((caddr_t) bouncekva, (caddr_t) origkva, tocopy);
473/*
474 * free the bounce allocation
475 */
476
477/*
478			printf("(kva: %x, pa: %x)", bouncekva, mybouncepa);
479*/
480			vm_bounce_page_free(mybouncepa, 1);
481		}
482
483		origkva += copycount;
484		bouncekva += copycount;
485		i += copycount;
486	}
487
488/*
489	printf("\n");
490*/
491/*
492 * add the old kva into the "to free" list
493 */
494
495	bouncekva= trunc_page((vm_offset_t) bp->b_data);
496	bouncekvaend= round_page((vm_offset_t)bp->b_data + bp->b_bufsize);
497
498/*
499	printf("freeva: %d\n", (bouncekvaend - bouncekva) / PAGE_SIZE);
500*/
501	vm_bounce_kva_free( bouncekva, (bouncekvaend - bouncekva), 0);
502	bp->b_data = bp->b_savekva;
503	bp->b_savekva = 0;
504	bp->b_flags &= ~B_BOUNCE;
505
506	return;
507}
508
509
510/*
511 * init the bounce buffer system
512 */
513void
514vm_bounce_init()
515{
516	int i;
517
518	kvasfreecnt = 0;
519
520	if (bouncepages == 0)
521		return;
522
523	bounceallocarraysize = (bouncepages + BITS_IN_UNSIGNED - 1) / BITS_IN_UNSIGNED;
524	bounceallocarray = malloc(bounceallocarraysize * sizeof(unsigned), M_TEMP, M_NOWAIT);
525
526	if (!bounceallocarray)
527		panic("Cannot allocate bounce resource array");
528
529	bouncepa = malloc(bouncepages * sizeof(vm_offset_t), M_TEMP, M_NOWAIT);
530	if (!bouncepa)
531		panic("Cannot allocate physical memory array");
532
533	for(i=0;i<bounceallocarraysize;i++) {
534		bounceallocarray[i] = 0xffffffff;
535	}
536
537	for(i=0;i<bouncepages;i++) {
538		vm_offset_t pa;
539		if( (pa = pmap_kextract((vm_offset_t) bouncememory + i * PAGE_SIZE)) >= SIXTEENMEG) {
540			printf("vm_bounce_init: bounce memory out of range -- bounce disabled\n");
541			free(bounceallocarray, M_TEMP);
542			bounceallocarray = NULL;
543			free(bouncepa, M_TEMP);
544			bouncepa = NULL;
545			bouncepages = 0;
546			break;
547		}
548		if( pa == 0)
549			panic("bounce memory not resident");
550		bouncepa[i] = pa;
551		bounceallocarray[i/(8*sizeof(int))] &= ~(1<<(i%(8*sizeof(int))));
552	}
553	bouncefree = bouncepages;
554
555}
556#endif /* BOUNCE_BUFFERS */
557
558/*
559 * quick version of vm_fault
560 */
561void
562vm_fault_quick(v, prot)
563	caddr_t v;
564	int prot;
565{
566	if (prot & VM_PROT_WRITE)
567		subyte(v, fubyte(v));
568	else
569		fubyte(v);
570}
571
572/*
573 * Finish a fork operation, with process p2 nearly set up.
574 * Copy and update the pcb, set up the stack so that the child
575 * ready to run and return to user mode.
576 */
577void
578cpu_fork(p1, p2)
579	register struct proc *p1, *p2;
580{
581	struct pcb *pcb2 = &p2->p_addr->u_pcb;
582
583	/* Ensure that p1's pcb is up to date. */
584	if (npxproc == p1)
585		npxsave(&p1->p_addr->u_pcb.pcb_savefpu);
586
587	/* Copy p1's pcb. */
588	p2->p_addr->u_pcb = p1->p_addr->u_pcb;
589
590	/*
591	 * Create a new fresh stack for the new process.
592	 * Copy the trap frame for the return to user mode as if from a
593	 * syscall.  This copies the user mode register values.
594	 */
595	p2->p_md.md_regs = (struct trapframe *)
596			   ((int)p2->p_addr + UPAGES * PAGE_SIZE) - 1;
597	*p2->p_md.md_regs = *p1->p_md.md_regs;
598
599	/*
600	 * Set registers for trampoline to user mode.  Leave space for the
601	 * return address on stack.  These are the kernel mode register values.
602	 */
603	pcb2->pcb_cr3 = vtophys(p2->p_vmspace->vm_pmap.pm_pdir);
604	pcb2->pcb_edi = p2->p_md.md_regs->tf_edi;
605	pcb2->pcb_esi = (int)fork_return;
606	pcb2->pcb_ebp = p2->p_md.md_regs->tf_ebp;
607	pcb2->pcb_esp = (int)p2->p_md.md_regs - sizeof(void *);
608	pcb2->pcb_ebx = (int)p2;
609	pcb2->pcb_eip = (int)fork_trampoline;
610	/*
611	 * pcb2->pcb_ldt:	duplicated below, if necessary.
612	 * pcb2->pcb_ldt_len:	cloned above.
613	 * pcb2->pcb_savefpu:	cloned above.
614	 * pcb2->pcb_flags:	cloned above (always 0 here?).
615	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
616	 */
617
618#ifdef VM86
619	/*
620	 * XXX don't copy the i/o pages.  this should probably be fixed.
621	 */
622	pcb2->pcb_ext = 0;
623#endif
624
625#ifdef USER_LDT
626        /* Copy the LDT, if necessary. */
627        if (pcb2->pcb_ldt != 0) {
628                union descriptor *new_ldt;
629                size_t len = pcb2->pcb_ldt_len * sizeof(union descriptor);
630
631                new_ldt = (union descriptor *)kmem_alloc(kernel_map, len);
632                bcopy(pcb2->pcb_ldt, new_ldt, len);
633                pcb2->pcb_ldt = (caddr_t)new_ldt;
634        }
635#endif
636
637	/*
638	 * Now, cpu_switch() can schedule the new process.
639	 * pcb_esp is loaded pointing to the cpu_switch() stack frame
640	 * containing the return address when exiting cpu_switch.
641	 * This will normally be to proc_trampoline(), which will have
642	 * %ebx loaded with the new proc's pointer.  proc_trampoline()
643	 * will set up a stack to call fork_return(p, frame); to complete
644	 * the return to user-mode.
645	 */
646}
647
648/*
649 * Intercept the return address from a freshly forked process that has NOT
650 * been scheduled yet.
651 *
652 * This is needed to make kernel threads stay in kernel mode.
653 */
654void
655cpu_set_fork_handler(p, func, arg)
656	struct proc *p;
657	void (*func) __P((void *));
658	void *arg;
659{
660	/*
661	 * Note that the trap frame follows the args, so the function
662	 * is really called like this:  func(arg, frame);
663	 */
664	p->p_addr->u_pcb.pcb_esi = (int) func;	/* function */
665	p->p_addr->u_pcb.pcb_ebx = (int) arg;	/* first arg */
666}
667
668void
669cpu_exit(p)
670	register struct proc *p;
671{
672#if defined(USER_LDT) || defined(VM86)
673	struct pcb *pcb = &p->p_addr->u_pcb;
674#endif
675
676#if NNPX > 0
677	npxexit(p);
678#endif	/* NNPX */
679#ifdef VM86
680	if (pcb->pcb_ext != 0) {
681	        /*
682		 * XXX do we need to move the TSS off the allocated pages
683		 * before freeing them?  (not done here)
684		 */
685		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ext,
686		    ctob(IOPAGES + 1));
687		pcb->pcb_ext = 0;
688	}
689#endif
690#ifdef USER_LDT
691	if (pcb->pcb_ldt != 0) {
692		if (pcb == curpcb)
693			lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
694		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
695			pcb->pcb_ldt_len * sizeof(union descriptor));
696		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
697	}
698#endif
699	cnt.v_swtch++;
700	cpu_switch(p);
701	panic("cpu_exit");
702}
703
704void
705cpu_wait(p)
706	struct proc *p;
707{
708	/* drop per-process resources */
709	pmap_dispose_proc(p);
710
711	/* and clean-out the vmspace */
712	vmspace_free(p->p_vmspace);
713}
714
715/*
716 * Dump the machine specific header information at the start of a core dump.
717 */
718int
719cpu_coredump(p, vp, cred)
720	struct proc *p;
721	struct vnode *vp;
722	struct ucred *cred;
723{
724
725	return (vn_rdwr(UIO_WRITE, vp, (caddr_t) p->p_addr, ctob(UPAGES),
726	    (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *)NULL,
727	    p));
728}
729
730#ifdef notyet
731static void
732setredzone(pte, vaddr)
733	u_short *pte;
734	caddr_t vaddr;
735{
736/* eventually do this by setting up an expand-down stack segment
737   for ss0: selector, allowing stack access down to top of u.
738   this means though that protection violations need to be handled
739   thru a double fault exception that must do an integral task
740   switch to a known good context, within which a dump can be
741   taken. a sensible scheme might be to save the initial context
742   used by sched (that has physical memory mapped 1:1 at bottom)
743   and take the dump while still in mapped mode */
744}
745#endif
746
747/*
748 * Convert kernel VA to physical address
749 */
750u_long
751kvtop(void *addr)
752{
753	vm_offset_t va;
754
755	va = pmap_kextract((vm_offset_t)addr);
756	if (va == 0)
757		panic("kvtop: zero page frame");
758	return((int)va);
759}
760
761/*
762 * Map an IO request into kernel virtual address space.
763 *
764 * All requests are (re)mapped into kernel VA space.
765 * Notice that we use b_bufsize for the size of the buffer
766 * to be mapped.  b_bcount might be modified by the driver.
767 */
768void
769vmapbuf(bp)
770	register struct buf *bp;
771{
772	register caddr_t addr, v, kva;
773	vm_offset_t pa;
774
775	if ((bp->b_flags & B_PHYS) == 0)
776		panic("vmapbuf");
777
778	for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page(bp->b_data);
779	    addr < bp->b_data + bp->b_bufsize;
780	    addr += PAGE_SIZE, v += PAGE_SIZE) {
781		/*
782		 * Do the vm_fault if needed; do the copy-on-write thing
783		 * when reading stuff off device into memory.
784		 */
785		vm_fault_quick(addr,
786			(bp->b_flags&B_READ)?(VM_PROT_READ|VM_PROT_WRITE):VM_PROT_READ);
787		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
788		if (pa == 0)
789			panic("vmapbuf: page not present");
790		vm_page_hold(PHYS_TO_VM_PAGE(pa));
791		pmap_kenter((vm_offset_t) v, pa);
792	}
793
794	kva = bp->b_saveaddr;
795	bp->b_saveaddr = bp->b_data;
796	bp->b_data = kva + (((vm_offset_t) bp->b_data) & PAGE_MASK);
797}
798
799/*
800 * Free the io map PTEs associated with this IO operation.
801 * We also invalidate the TLB entries and restore the original b_addr.
802 */
803void
804vunmapbuf(bp)
805	register struct buf *bp;
806{
807	register caddr_t addr;
808	vm_offset_t pa;
809
810	if ((bp->b_flags & B_PHYS) == 0)
811		panic("vunmapbuf");
812
813	for (addr = (caddr_t)trunc_page(bp->b_data);
814	    addr < bp->b_data + bp->b_bufsize;
815	    addr += PAGE_SIZE) {
816		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
817		pmap_kremove((vm_offset_t) addr);
818		vm_page_unhold(PHYS_TO_VM_PAGE(pa));
819	}
820
821	bp->b_data = bp->b_saveaddr;
822}
823
824/*
825 * Force reset the processor by invalidating the entire address space!
826 */
827void
828cpu_reset()
829{
830
831#ifdef PC98
832	/*
833	 * Attempt to do a CPU reset via CPU reset port.
834	 */
835	disable_intr();
836	outb(0x37, 0x0f);		/* SHUT0 = 0. */
837	outb(0x37, 0x0b);		/* SHUT1 = 0. */
838	outb(0xf0, 0x00);		/* Reset. */
839#else
840	/*
841	 * Attempt to do a CPU reset via the keyboard controller,
842	 * do not turn of the GateA20, as any machine that fails
843	 * to do the reset here would then end up in no man's land.
844	 */
845
846#if !defined(BROKEN_KEYBOARD_RESET)
847	outb(IO_KBD + 4, 0xFE);
848	DELAY(500000);	/* wait 0.5 sec to see if that did it */
849	printf("Keyboard reset did not work, attempting CPU shutdown\n");
850	DELAY(1000000);	/* wait 1 sec for printf to complete */
851#endif
852#endif /* PC98 */
853	/* force a shutdown by unmapping entire address space ! */
854	bzero((caddr_t) PTD, PAGE_SIZE);
855
856	/* "good night, sweet prince .... <THUNK!>" */
857	invltlb();
858	/* NOTREACHED */
859	while(1);
860}
861
862/*
863 * Grow the user stack to allow for 'sp'. This version grows the stack in
864 *	chunks of SGROWSIZ.
865 */
866int
867grow(p, sp)
868	struct proc *p;
869	u_int sp;
870{
871	unsigned int nss;
872	caddr_t v;
873	struct vmspace *vm = p->p_vmspace;
874
875	if ((caddr_t)sp <= vm->vm_maxsaddr || (unsigned)sp >= (unsigned)USRSTACK)
876	    return (1);
877
878	nss = roundup(USRSTACK - (unsigned)sp, PAGE_SIZE);
879
880	if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur)
881		return (0);
882
883	if (vm->vm_ssize && roundup(vm->vm_ssize << PAGE_SHIFT,
884	    SGROWSIZ) < nss) {
885		int grow_amount;
886		/*
887		 * If necessary, grow the VM that the stack occupies
888		 * to allow for the rlimit. This allows us to not have
889		 * to allocate all of the VM up-front in execve (which
890		 * is expensive).
891		 * Grow the VM by the amount requested rounded up to
892		 * the nearest SGROWSIZ to provide for some hysteresis.
893		 */
894		grow_amount = roundup((nss - (vm->vm_ssize << PAGE_SHIFT)), SGROWSIZ);
895		v = (char *)USRSTACK - roundup(vm->vm_ssize << PAGE_SHIFT,
896		    SGROWSIZ) - grow_amount;
897		/*
898		 * If there isn't enough room to extend by SGROWSIZ, then
899		 * just extend to the maximum size
900		 */
901		if (v < vm->vm_maxsaddr) {
902			v = vm->vm_maxsaddr;
903			grow_amount = MAXSSIZ - (vm->vm_ssize << PAGE_SHIFT);
904		}
905		if ((grow_amount == 0) || (vm_map_find(&vm->vm_map, NULL, 0, (vm_offset_t *)&v,
906		    grow_amount, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != KERN_SUCCESS)) {
907			return (0);
908		}
909		vm->vm_ssize += grow_amount >> PAGE_SHIFT;
910	}
911
912	return (1);
913}
914
915/*
916 * Implement the pre-zeroed page mechanism.
917 * This routine is called from the idle loop.
918 */
919int
920vm_page_zero_idle()
921{
922	static int free_rover;
923	vm_page_t m;
924	int s;
925
926#ifdef WRONG
927	if (cnt.v_free_count <= cnt.v_interrupt_free_min)
928		return (0);
929#endif
930	/*
931	 * XXX
932	 * We stop zeroing pages when there are sufficent prezeroed pages.
933	 * This threshold isn't really needed, except we want to
934	 * bypass unneeded calls to vm_page_list_find, and the
935	 * associated cache flush and latency.  The pre-zero will
936	 * still be called when there are significantly more
937	 * non-prezeroed pages than zeroed pages.  The threshold
938	 * of half the number of reserved pages is arbitrary, but
939	 * approximately the right amount.  Eventually, we should
940	 * perhaps interrupt the zero operation when a process
941	 * is found to be ready to run.
942	 */
943	if (cnt.v_free_count - vm_page_zero_count <= cnt.v_free_reserved / 2)
944		return (0);
945#ifdef SMP
946	get_mplock();
947#endif
948	s = splvm();
949	enable_intr();
950	m = vm_page_list_find(PQ_FREE, free_rover);
951	if (m != NULL) {
952		--(*vm_page_queues[m->queue].lcnt);
953		TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq);
954		m->queue = PQ_NONE;
955		splx(s);
956#ifdef SMP
957		rel_mplock();
958#endif
959		pmap_zero_page(VM_PAGE_TO_PHYS(m));
960#ifdef SMP
961		get_mplock();
962#endif
963		(void)splvm();
964		m->queue = PQ_ZERO + m->pc;
965		++(*vm_page_queues[m->queue].lcnt);
966		TAILQ_INSERT_HEAD(vm_page_queues[m->queue].pl, m, pageq);
967		free_rover = (free_rover + PQ_PRIME3) & PQ_L2_MASK;
968		++vm_page_zero_count;
969	}
970	splx(s);
971	disable_intr();
972#ifdef SMP
973	rel_mplock();
974#endif
975	return (1);
976}
977
978/*
979 * Software interrupt handler for queued VM system processing.
980 */
981void
982swi_vm()
983{
984	if (busdma_swi_pending != 0)
985		busdma_swi();
986}
987
988/*
989 * Tell whether this address is in some physical memory region.
990 * Currently used by the kernel coredump code in order to avoid
991 * dumping the ``ISA memory hole'' which could cause indefinite hangs,
992 * or other unpredictable behaviour.
993 */
994
995#include "isa.h"
996
997int
998is_physical_memory(addr)
999	vm_offset_t addr;
1000{
1001
1002#if NISA > 0
1003	/* The ISA ``memory hole''. */
1004	if (addr >= 0xa0000 && addr < 0x100000)
1005		return 0;
1006#endif
1007
1008	/*
1009	 * stuff other tests for known memory-mapped devices (PCI?)
1010	 * here
1011	 */
1012
1013	return 1;
1014}
1015