vm_machdep.c revision 33307
1/*-
2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * Copyright (c) 1994 John Dyson
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department, and William Jolitz.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
40 *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
41 *	$Id: vm_machdep.c,v 1.99 1998/02/06 12:13:11 eivind Exp $
42 */
43
44#include "npx.h"
45#include "opt_bounce.h"
46#include "opt_user_ldt.h"
47#include "opt_vm86.h"
48
49#include <sys/param.h>
50#include <sys/systm.h>
51#include <sys/proc.h>
52#include <sys/malloc.h>
53#include <sys/buf.h>
54#include <sys/vnode.h>
55#include <sys/vmmeter.h>
56
57#include <machine/clock.h>
58#include <machine/cpu.h>
59#include <machine/md_var.h>
60#ifdef SMP
61#include <machine/smp.h>
62#endif
63
64#include <vm/vm.h>
65#include <vm/vm_param.h>
66#include <vm/vm_prot.h>
67#include <sys/lock.h>
68#include <vm/vm_kern.h>
69#include <vm/vm_page.h>
70#include <vm/vm_map.h>
71#include <vm/vm_extern.h>
72
73#include <sys/user.h>
74
75#ifdef PC98
76#include <pc98/pc98/pc98.h>
77#else
78#include <i386/isa/isa.h>
79#endif
80
81#ifdef BOUNCE_BUFFERS
82static vm_offset_t
83		vm_bounce_kva __P((int size, int waitok));
84static void	vm_bounce_kva_free __P((vm_offset_t addr, vm_offset_t size,
85					int now));
86static vm_offset_t
87		vm_bounce_page_find __P((int count));
88static void	vm_bounce_page_free __P((vm_offset_t pa, int count));
89
90static volatile int	kvasfreecnt;
91
92caddr_t		bouncememory;
93static int	bpwait;
94static vm_offset_t	*bouncepa;
95static int		bmwait, bmfreeing;
96
97#define BITS_IN_UNSIGNED (8*sizeof(unsigned))
98static int		bounceallocarraysize;
99static unsigned	*bounceallocarray;
100static int		bouncefree;
101
102#if defined(PC98) && defined (EPSON_BOUNCEDMA)
103#define SIXTEENMEG (3840*4096)			/* 15MB boundary */
104#else
105#define SIXTEENMEG (4096*4096)
106#endif
107#define MAXBKVA 1024
108int		maxbkva = MAXBKVA*PAGE_SIZE;
109
110/* special list that can be used at interrupt time for eventual kva free */
111static struct kvasfree {
112	vm_offset_t addr;
113	vm_offset_t size;
114} kvaf[MAXBKVA];
115
116/*
117 * get bounce buffer pages (count physically contiguous)
118 * (only 1 inplemented now)
119 */
120static vm_offset_t
121vm_bounce_page_find(count)
122	int count;
123{
124	int bit;
125	int s,i;
126
127	if (count != 1)
128		panic("vm_bounce_page_find -- no support for > 1 page yet!!!");
129
130	s = splbio();
131retry:
132	for (i = 0; i < bounceallocarraysize; i++) {
133		if (bounceallocarray[i] != 0xffffffff) {
134			bit = ffs(~bounceallocarray[i]);
135			if (bit) {
136				bounceallocarray[i] |= 1 << (bit - 1) ;
137				bouncefree -= count;
138				splx(s);
139				return bouncepa[(i * BITS_IN_UNSIGNED + (bit - 1))];
140			}
141		}
142	}
143	bpwait = 1;
144	tsleep((caddr_t) &bounceallocarray, PRIBIO, "bncwai", 0);
145	goto retry;
146}
147
148static void
149vm_bounce_kva_free(addr, size, now)
150	vm_offset_t addr;
151	vm_offset_t size;
152	int now;
153{
154	int s = splbio();
155	kvaf[kvasfreecnt].addr = addr;
156	kvaf[kvasfreecnt].size = size;
157	++kvasfreecnt;
158	if( now) {
159		/*
160		 * this will do wakeups
161		 */
162		vm_bounce_kva(0,0);
163	} else {
164		if (bmwait) {
165		/*
166		 * if anyone is waiting on the bounce-map, then wakeup
167		 */
168			wakeup((caddr_t) io_map);
169			bmwait = 0;
170		}
171	}
172	splx(s);
173}
174
175/*
176 * free count bounce buffer pages
177 */
178static void
179vm_bounce_page_free(pa, count)
180	vm_offset_t pa;
181	int count;
182{
183	int allocindex;
184	int index;
185	int bit;
186
187	if (count != 1)
188		panic("vm_bounce_page_free -- no support for > 1 page yet!!!");
189
190	for(index=0;index<bouncepages;index++) {
191		if( pa == bouncepa[index])
192			break;
193	}
194
195	if( index == bouncepages)
196		panic("vm_bounce_page_free: invalid bounce buffer");
197
198	allocindex = index / BITS_IN_UNSIGNED;
199	bit = index % BITS_IN_UNSIGNED;
200
201	bounceallocarray[allocindex] &= ~(1 << bit);
202
203	bouncefree += count;
204	if (bpwait) {
205		bpwait = 0;
206		wakeup((caddr_t) &bounceallocarray);
207	}
208}
209
210/*
211 * allocate count bounce buffer kva pages
212 */
213static vm_offset_t
214vm_bounce_kva(size, waitok)
215	int size;
216	int waitok;
217{
218	int i;
219	vm_offset_t kva = 0;
220	vm_offset_t off;
221	int s = splbio();
222more:
223	if (!bmfreeing && kvasfreecnt) {
224		bmfreeing = 1;
225		for (i = 0; i < kvasfreecnt; i++) {
226			for(off=0;off<kvaf[i].size;off+=PAGE_SIZE) {
227				pmap_kremove( kvaf[i].addr + off);
228			}
229			kmem_free_wakeup(io_map, kvaf[i].addr,
230				kvaf[i].size);
231		}
232		kvasfreecnt = 0;
233		bmfreeing = 0;
234		if( bmwait) {
235			bmwait = 0;
236			wakeup( (caddr_t) io_map);
237		}
238	}
239
240	if( size == 0) {
241		splx(s);
242		return 0;
243	}
244
245	if ((kva = kmem_alloc_pageable(io_map, size)) == 0) {
246		if( !waitok) {
247			splx(s);
248			return 0;
249		}
250		bmwait = 1;
251		tsleep((caddr_t) io_map, PRIBIO, "bmwait", 0);
252		goto more;
253	}
254	splx(s);
255	return kva;
256}
257
258/*
259 * same as vm_bounce_kva -- but really allocate (but takes pages as arg)
260 */
261vm_offset_t
262vm_bounce_kva_alloc(count)
263int count;
264{
265	int i;
266	vm_offset_t kva;
267	vm_offset_t pa;
268	if( bouncepages == 0) {
269		kva = (vm_offset_t) malloc(count*PAGE_SIZE, M_TEMP, M_WAITOK);
270		return kva;
271	}
272	kva = vm_bounce_kva(count*PAGE_SIZE, 1);
273	for(i=0;i<count;i++) {
274		pa = vm_bounce_page_find(1);
275		pmap_kenter(kva + i * PAGE_SIZE, pa);
276	}
277	return kva;
278}
279
280/*
281 * same as vm_bounce_kva_free -- but really free
282 */
283void
284vm_bounce_kva_alloc_free(kva, count)
285	vm_offset_t kva;
286	int count;
287{
288	int i;
289	vm_offset_t pa;
290	if( bouncepages == 0) {
291		free((caddr_t) kva, M_TEMP);
292		return;
293	}
294	for(i = 0; i < count; i++) {
295		pa = pmap_kextract(kva + i * PAGE_SIZE);
296		vm_bounce_page_free(pa, 1);
297	}
298	vm_bounce_kva_free(kva, count*PAGE_SIZE, 0);
299}
300
301/*
302 * do the things necessary to the struct buf to implement
303 * bounce buffers...  inserted before the disk sort
304 */
305void
306vm_bounce_alloc(bp)
307	struct buf *bp;
308{
309	int countvmpg;
310	vm_offset_t vastart, vaend;
311	vm_offset_t vapstart, vapend;
312	vm_offset_t va, kva;
313	vm_offset_t pa;
314	int dobounceflag = 0;
315	int i;
316
317	if (bouncepages == 0)
318		return;
319
320	if (bp->b_flags & B_BOUNCE) {
321		printf("vm_bounce_alloc: called recursively???\n");
322		return;
323	}
324
325	if (bp->b_bufsize < bp->b_bcount) {
326		printf(
327		    "vm_bounce_alloc: b_bufsize(0x%lx) < b_bcount(0x%lx) !!\n",
328			bp->b_bufsize, bp->b_bcount);
329		panic("vm_bounce_alloc");
330	}
331
332/*
333 *  This is not really necessary
334 *	if( bp->b_bufsize != bp->b_bcount) {
335 *		printf("size: %d, count: %d\n", bp->b_bufsize, bp->b_bcount);
336 *	}
337 */
338
339
340	vastart = (vm_offset_t) bp->b_data;
341	vaend = (vm_offset_t) bp->b_data + bp->b_bufsize;
342
343	vapstart = trunc_page(vastart);
344	vapend = round_page(vaend);
345	countvmpg = (vapend - vapstart) / PAGE_SIZE;
346
347/*
348 * if any page is above 16MB, then go into bounce-buffer mode
349 */
350	va = vapstart;
351	for (i = 0; i < countvmpg; i++) {
352		pa = pmap_kextract(va);
353		if (pa >= SIXTEENMEG)
354			++dobounceflag;
355		if( pa == 0)
356			panic("vm_bounce_alloc: Unmapped page");
357		va += PAGE_SIZE;
358	}
359	if (dobounceflag == 0)
360		return;
361
362	if (bouncepages < dobounceflag)
363		panic("Not enough bounce buffers!!!");
364
365/*
366 * allocate a replacement kva for b_addr
367 */
368	kva = vm_bounce_kva(countvmpg*PAGE_SIZE, 1);
369#if 0
370	printf("%s: vapstart: %x, vapend: %x, countvmpg: %d, kva: %x ",
371		(bp->b_flags & B_READ) ? "read":"write",
372			vapstart, vapend, countvmpg, kva);
373#endif
374	va = vapstart;
375	for (i = 0; i < countvmpg; i++) {
376		pa = pmap_kextract(va);
377		if (pa >= SIXTEENMEG) {
378			/*
379			 * allocate a replacement page
380			 */
381			vm_offset_t bpa = vm_bounce_page_find(1);
382			pmap_kenter(kva + (PAGE_SIZE * i), bpa);
383#if 0
384			printf("r(%d): (%x,%x,%x) ", i, va, pa, bpa);
385#endif
386			/*
387			 * if we are writing, the copy the data into the page
388			 */
389			if ((bp->b_flags & B_READ) == 0) {
390				bcopy((caddr_t) va, (caddr_t) kva + (PAGE_SIZE * i), PAGE_SIZE);
391			}
392		} else {
393			/*
394			 * use original page
395			 */
396			pmap_kenter(kva + (PAGE_SIZE * i), pa);
397		}
398		va += PAGE_SIZE;
399	}
400
401/*
402 * flag the buffer as being bounced
403 */
404	bp->b_flags |= B_BOUNCE;
405/*
406 * save the original buffer kva
407 */
408	bp->b_savekva = bp->b_data;
409/*
410 * put our new kva into the buffer (offset by original offset)
411 */
412	bp->b_data = (caddr_t) (((vm_offset_t) kva) |
413				((vm_offset_t) bp->b_savekva & PAGE_MASK));
414#if 0
415	printf("b_savekva: %x, newva: %x\n", bp->b_savekva, bp->b_data);
416#endif
417	return;
418}
419
420/*
421 * hook into biodone to free bounce buffer
422 */
423void
424vm_bounce_free(bp)
425	struct buf *bp;
426{
427	int i;
428	vm_offset_t origkva, bouncekva, bouncekvaend;
429
430/*
431 * if this isn't a bounced buffer, then just return
432 */
433	if ((bp->b_flags & B_BOUNCE) == 0)
434		return;
435
436/*
437 *  This check is not necessary
438 *	if (bp->b_bufsize != bp->b_bcount) {
439 *		printf("vm_bounce_free: b_bufsize=%d, b_bcount=%d\n",
440 *			bp->b_bufsize, bp->b_bcount);
441 *	}
442 */
443
444	origkva = (vm_offset_t) bp->b_savekva;
445	bouncekva = (vm_offset_t) bp->b_data;
446/*
447	printf("free: %d ", bp->b_bufsize);
448*/
449
450/*
451 * check every page in the kva space for b_addr
452 */
453	for (i = 0; i < bp->b_bufsize; ) {
454		vm_offset_t mybouncepa;
455		vm_offset_t copycount;
456
457		copycount = round_page(bouncekva + 1) - bouncekva;
458		mybouncepa = pmap_kextract(trunc_page(bouncekva));
459
460/*
461 * if this is a bounced pa, then process as one
462 */
463		if ( mybouncepa != pmap_kextract( trunc_page( origkva))) {
464			vm_offset_t tocopy = copycount;
465			if (i + tocopy > bp->b_bufsize)
466				tocopy = bp->b_bufsize - i;
467/*
468 * if this is a read, then copy from bounce buffer into original buffer
469 */
470			if (bp->b_flags & B_READ)
471				bcopy((caddr_t) bouncekva, (caddr_t) origkva, tocopy);
472/*
473 * free the bounce allocation
474 */
475
476/*
477			printf("(kva: %x, pa: %x)", bouncekva, mybouncepa);
478*/
479			vm_bounce_page_free(mybouncepa, 1);
480		}
481
482		origkva += copycount;
483		bouncekva += copycount;
484		i += copycount;
485	}
486
487/*
488	printf("\n");
489*/
490/*
491 * add the old kva into the "to free" list
492 */
493
494	bouncekva= trunc_page((vm_offset_t) bp->b_data);
495	bouncekvaend= round_page((vm_offset_t)bp->b_data + bp->b_bufsize);
496
497/*
498	printf("freeva: %d\n", (bouncekvaend - bouncekva) / PAGE_SIZE);
499*/
500	vm_bounce_kva_free( bouncekva, (bouncekvaend - bouncekva), 0);
501	bp->b_data = bp->b_savekva;
502	bp->b_savekva = 0;
503	bp->b_flags &= ~B_BOUNCE;
504
505	return;
506}
507
508
509/*
510 * init the bounce buffer system
511 */
512void
513vm_bounce_init()
514{
515	int i;
516
517	kvasfreecnt = 0;
518
519	if (bouncepages == 0)
520		return;
521
522	bounceallocarraysize = (bouncepages + BITS_IN_UNSIGNED - 1) / BITS_IN_UNSIGNED;
523	bounceallocarray = malloc(bounceallocarraysize * sizeof(unsigned), M_TEMP, M_NOWAIT);
524
525	if (!bounceallocarray)
526		panic("Cannot allocate bounce resource array");
527
528	bouncepa = malloc(bouncepages * sizeof(vm_offset_t), M_TEMP, M_NOWAIT);
529	if (!bouncepa)
530		panic("Cannot allocate physical memory array");
531
532	for(i=0;i<bounceallocarraysize;i++) {
533		bounceallocarray[i] = 0xffffffff;
534	}
535
536	for(i=0;i<bouncepages;i++) {
537		vm_offset_t pa;
538		if( (pa = pmap_kextract((vm_offset_t) bouncememory + i * PAGE_SIZE)) >= SIXTEENMEG) {
539			printf("vm_bounce_init: bounce memory out of range -- bounce disabled\n");
540			free(bounceallocarray, M_TEMP);
541			bounceallocarray = NULL;
542			free(bouncepa, M_TEMP);
543			bouncepa = NULL;
544			bouncepages = 0;
545			break;
546		}
547		if( pa == 0)
548			panic("bounce memory not resident");
549		bouncepa[i] = pa;
550		bounceallocarray[i/(8*sizeof(int))] &= ~(1<<(i%(8*sizeof(int))));
551	}
552	bouncefree = bouncepages;
553
554}
555#endif /* BOUNCE_BUFFERS */
556
557/*
558 * quick version of vm_fault
559 */
560void
561vm_fault_quick(v, prot)
562	caddr_t v;
563	int prot;
564{
565	if (prot & VM_PROT_WRITE)
566		subyte(v, fubyte(v));
567	else
568		fubyte(v);
569}
570
571/*
572 * Finish a fork operation, with process p2 nearly set up.
573 * Copy and update the pcb, set up the stack so that the child
574 * ready to run and return to user mode.
575 */
576void
577cpu_fork(p1, p2)
578	register struct proc *p1, *p2;
579{
580	struct pcb *pcb2 = &p2->p_addr->u_pcb;
581
582#if NNPX > 0
583	/* Ensure that p1's pcb is up to date. */
584	if (npxproc == p1)
585		npxsave(&p1->p_addr->u_pcb.pcb_savefpu);
586#endif
587
588	/* Copy p1's pcb. */
589	p2->p_addr->u_pcb = p1->p_addr->u_pcb;
590
591	/*
592	 * Create a new fresh stack for the new process.
593	 * Copy the trap frame for the return to user mode as if from a
594	 * syscall.  This copies the user mode register values.
595	 */
596	p2->p_md.md_regs = (struct trapframe *)
597			   ((int)p2->p_addr + UPAGES * PAGE_SIZE) - 1;
598	*p2->p_md.md_regs = *p1->p_md.md_regs;
599
600	/*
601	 * Set registers for trampoline to user mode.  Leave space for the
602	 * return address on stack.  These are the kernel mode register values.
603	 */
604	pcb2->pcb_cr3 = vtophys(p2->p_vmspace->vm_pmap.pm_pdir);
605	pcb2->pcb_edi = p2->p_md.md_regs->tf_edi;
606	pcb2->pcb_esi = (int)fork_return;
607	pcb2->pcb_ebp = p2->p_md.md_regs->tf_ebp;
608	pcb2->pcb_esp = (int)p2->p_md.md_regs - sizeof(void *);
609	pcb2->pcb_ebx = (int)p2;
610	pcb2->pcb_eip = (int)fork_trampoline;
611	/*
612	 * pcb2->pcb_ldt:	duplicated below, if necessary.
613	 * pcb2->pcb_ldt_len:	cloned above.
614	 * pcb2->pcb_savefpu:	cloned above.
615	 * pcb2->pcb_flags:	cloned above (always 0 here?).
616	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
617	 */
618
619#ifdef VM86
620	/*
621	 * XXX don't copy the i/o pages.  this should probably be fixed.
622	 */
623	pcb2->pcb_ext = 0;
624#endif
625
626#ifdef USER_LDT
627        /* Copy the LDT, if necessary. */
628        if (pcb2->pcb_ldt != 0) {
629                union descriptor *new_ldt;
630                size_t len = pcb2->pcb_ldt_len * sizeof(union descriptor);
631
632                new_ldt = (union descriptor *)kmem_alloc(kernel_map, len);
633                bcopy(pcb2->pcb_ldt, new_ldt, len);
634                pcb2->pcb_ldt = (caddr_t)new_ldt;
635        }
636#endif
637
638	/*
639	 * Now, cpu_switch() can schedule the new process.
640	 * pcb_esp is loaded pointing to the cpu_switch() stack frame
641	 * containing the return address when exiting cpu_switch.
642	 * This will normally be to proc_trampoline(), which will have
643	 * %ebx loaded with the new proc's pointer.  proc_trampoline()
644	 * will set up a stack to call fork_return(p, frame); to complete
645	 * the return to user-mode.
646	 */
647}
648
649/*
650 * Intercept the return address from a freshly forked process that has NOT
651 * been scheduled yet.
652 *
653 * This is needed to make kernel threads stay in kernel mode.
654 */
655void
656cpu_set_fork_handler(p, func, arg)
657	struct proc *p;
658	void (*func) __P((void *));
659	void *arg;
660{
661	/*
662	 * Note that the trap frame follows the args, so the function
663	 * is really called like this:  func(arg, frame);
664	 */
665	p->p_addr->u_pcb.pcb_esi = (int) func;	/* function */
666	p->p_addr->u_pcb.pcb_ebx = (int) arg;	/* first arg */
667}
668
669void
670cpu_exit(p)
671	register struct proc *p;
672{
673#if defined(USER_LDT) || defined(VM86)
674	struct pcb *pcb = &p->p_addr->u_pcb;
675#endif
676
677#if NNPX > 0
678	npxexit(p);
679#endif	/* NNPX */
680#ifdef VM86
681	if (pcb->pcb_ext != 0) {
682	        /*
683		 * XXX do we need to move the TSS off the allocated pages
684		 * before freeing them?  (not done here)
685		 */
686		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ext,
687		    ctob(IOPAGES + 1));
688		pcb->pcb_ext = 0;
689	}
690#endif
691#ifdef USER_LDT
692	if (pcb->pcb_ldt != 0) {
693		if (pcb == curpcb)
694			lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
695		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
696			pcb->pcb_ldt_len * sizeof(union descriptor));
697		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
698	}
699#endif
700	cnt.v_swtch++;
701	cpu_switch(p);
702	panic("cpu_exit");
703}
704
705void
706cpu_wait(p)
707	struct proc *p;
708{
709	/* drop per-process resources */
710	pmap_dispose_proc(p);
711
712	/* and clean-out the vmspace */
713	vmspace_free(p->p_vmspace);
714}
715
716/*
717 * Dump the machine specific header information at the start of a core dump.
718 */
719int
720cpu_coredump(p, vp, cred)
721	struct proc *p;
722	struct vnode *vp;
723	struct ucred *cred;
724{
725
726	return (vn_rdwr(UIO_WRITE, vp, (caddr_t) p->p_addr, ctob(UPAGES),
727	    (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *)NULL,
728	    p));
729}
730
731#ifdef notyet
732static void
733setredzone(pte, vaddr)
734	u_short *pte;
735	caddr_t vaddr;
736{
737/* eventually do this by setting up an expand-down stack segment
738   for ss0: selector, allowing stack access down to top of u.
739   this means though that protection violations need to be handled
740   thru a double fault exception that must do an integral task
741   switch to a known good context, within which a dump can be
742   taken. a sensible scheme might be to save the initial context
743   used by sched (that has physical memory mapped 1:1 at bottom)
744   and take the dump while still in mapped mode */
745}
746#endif
747
748/*
749 * Convert kernel VA to physical address
750 */
751u_long
752kvtop(void *addr)
753{
754	vm_offset_t va;
755
756	va = pmap_kextract((vm_offset_t)addr);
757	if (va == 0)
758		panic("kvtop: zero page frame");
759	return((int)va);
760}
761
762/*
763 * Map an IO request into kernel virtual address space.
764 *
765 * All requests are (re)mapped into kernel VA space.
766 * Notice that we use b_bufsize for the size of the buffer
767 * to be mapped.  b_bcount might be modified by the driver.
768 */
769void
770vmapbuf(bp)
771	register struct buf *bp;
772{
773	register caddr_t addr, v, kva;
774	vm_offset_t pa;
775
776	if ((bp->b_flags & B_PHYS) == 0)
777		panic("vmapbuf");
778
779	for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page(bp->b_data);
780	    addr < bp->b_data + bp->b_bufsize;
781	    addr += PAGE_SIZE, v += PAGE_SIZE) {
782		/*
783		 * Do the vm_fault if needed; do the copy-on-write thing
784		 * when reading stuff off device into memory.
785		 */
786		vm_fault_quick(addr,
787			(bp->b_flags&B_READ)?(VM_PROT_READ|VM_PROT_WRITE):VM_PROT_READ);
788		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
789		if (pa == 0)
790			panic("vmapbuf: page not present");
791		vm_page_hold(PHYS_TO_VM_PAGE(pa));
792		pmap_kenter((vm_offset_t) v, pa);
793	}
794
795	kva = bp->b_saveaddr;
796	bp->b_saveaddr = bp->b_data;
797	bp->b_data = kva + (((vm_offset_t) bp->b_data) & PAGE_MASK);
798}
799
800/*
801 * Free the io map PTEs associated with this IO operation.
802 * We also invalidate the TLB entries and restore the original b_addr.
803 */
804void
805vunmapbuf(bp)
806	register struct buf *bp;
807{
808	register caddr_t addr;
809	vm_offset_t pa;
810
811	if ((bp->b_flags & B_PHYS) == 0)
812		panic("vunmapbuf");
813
814	for (addr = (caddr_t)trunc_page(bp->b_data);
815	    addr < bp->b_data + bp->b_bufsize;
816	    addr += PAGE_SIZE) {
817		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
818		pmap_kremove((vm_offset_t) addr);
819		vm_page_unhold(PHYS_TO_VM_PAGE(pa));
820	}
821
822	bp->b_data = bp->b_saveaddr;
823}
824
825/*
826 * Force reset the processor by invalidating the entire address space!
827 */
828void
829cpu_reset()
830{
831
832#ifdef PC98
833	/*
834	 * Attempt to do a CPU reset via CPU reset port.
835	 */
836	disable_intr();
837	outb(0x37, 0x0f);		/* SHUT0 = 0. */
838	outb(0x37, 0x0b);		/* SHUT1 = 0. */
839	outb(0xf0, 0x00);		/* Reset. */
840#else
841	/*
842	 * Attempt to do a CPU reset via the keyboard controller,
843	 * do not turn of the GateA20, as any machine that fails
844	 * to do the reset here would then end up in no man's land.
845	 */
846
847#if !defined(BROKEN_KEYBOARD_RESET)
848	outb(IO_KBD + 4, 0xFE);
849	DELAY(500000);	/* wait 0.5 sec to see if that did it */
850	printf("Keyboard reset did not work, attempting CPU shutdown\n");
851	DELAY(1000000);	/* wait 1 sec for printf to complete */
852#endif
853#endif /* PC98 */
854	/* force a shutdown by unmapping entire address space ! */
855	bzero((caddr_t) PTD, PAGE_SIZE);
856
857	/* "good night, sweet prince .... <THUNK!>" */
858	invltlb();
859	/* NOTREACHED */
860	while(1);
861}
862
863/*
864 * Grow the user stack to allow for 'sp'. This version grows the stack in
865 *	chunks of SGROWSIZ.
866 */
867int
868grow(p, sp)
869	struct proc *p;
870	u_int sp;
871{
872	unsigned int nss;
873	caddr_t v;
874	struct vmspace *vm = p->p_vmspace;
875
876	if ((caddr_t)sp <= vm->vm_maxsaddr || (unsigned)sp >= (unsigned)USRSTACK)
877	    return (1);
878
879	nss = roundup(USRSTACK - (unsigned)sp, PAGE_SIZE);
880
881	if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur)
882		return (0);
883
884	if (vm->vm_ssize && roundup(vm->vm_ssize << PAGE_SHIFT,
885	    SGROWSIZ) < nss) {
886		int grow_amount;
887		/*
888		 * If necessary, grow the VM that the stack occupies
889		 * to allow for the rlimit. This allows us to not have
890		 * to allocate all of the VM up-front in execve (which
891		 * is expensive).
892		 * Grow the VM by the amount requested rounded up to
893		 * the nearest SGROWSIZ to provide for some hysteresis.
894		 */
895		grow_amount = roundup((nss - (vm->vm_ssize << PAGE_SHIFT)), SGROWSIZ);
896		v = (char *)USRSTACK - roundup(vm->vm_ssize << PAGE_SHIFT,
897		    SGROWSIZ) - grow_amount;
898		/*
899		 * If there isn't enough room to extend by SGROWSIZ, then
900		 * just extend to the maximum size
901		 */
902		if (v < vm->vm_maxsaddr) {
903			v = vm->vm_maxsaddr;
904			grow_amount = MAXSSIZ - (vm->vm_ssize << PAGE_SHIFT);
905		}
906		if ((grow_amount == 0) || (vm_map_find(&vm->vm_map, NULL, 0, (vm_offset_t *)&v,
907		    grow_amount, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != KERN_SUCCESS)) {
908			return (0);
909		}
910		vm->vm_ssize += grow_amount >> PAGE_SHIFT;
911	}
912
913	return (1);
914}
915
916/*
917 * Implement the pre-zeroed page mechanism.
918 * This routine is called from the idle loop.
919 */
920int
921vm_page_zero_idle()
922{
923	static int free_rover;
924	vm_page_t m;
925	int s;
926
927#ifdef WRONG
928	if (cnt.v_free_count <= cnt.v_interrupt_free_min)
929		return (0);
930#endif
931	/*
932	 * XXX
933	 * We stop zeroing pages when there are sufficent prezeroed pages.
934	 * This threshold isn't really needed, except we want to
935	 * bypass unneeded calls to vm_page_list_find, and the
936	 * associated cache flush and latency.  The pre-zero will
937	 * still be called when there are significantly more
938	 * non-prezeroed pages than zeroed pages.  The threshold
939	 * of half the number of reserved pages is arbitrary, but
940	 * approximately the right amount.  Eventually, we should
941	 * perhaps interrupt the zero operation when a process
942	 * is found to be ready to run.
943	 */
944	if (cnt.v_free_count - vm_page_zero_count <= cnt.v_free_reserved / 2)
945		return (0);
946#ifdef SMP
947	get_mplock();
948#endif
949	s = splvm();
950	enable_intr();
951	m = vm_page_list_find(PQ_FREE, free_rover);
952	if (m != NULL) {
953		--(*vm_page_queues[m->queue].lcnt);
954		TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq);
955		m->queue = PQ_NONE;
956		splx(s);
957#ifdef SMP
958		rel_mplock();
959#endif
960		pmap_zero_page(VM_PAGE_TO_PHYS(m));
961#ifdef SMP
962		get_mplock();
963#endif
964		(void)splvm();
965		m->queue = PQ_ZERO + m->pc;
966		++(*vm_page_queues[m->queue].lcnt);
967		TAILQ_INSERT_HEAD(vm_page_queues[m->queue].pl, m, pageq);
968		free_rover = (free_rover + PQ_PRIME3) & PQ_L2_MASK;
969		++vm_page_zero_count;
970	}
971	splx(s);
972	disable_intr();
973#ifdef SMP
974	rel_mplock();
975#endif
976	return (1);
977}
978
979/*
980 * Software interrupt handler for queued VM system processing.
981 */
982void
983swi_vm()
984{
985	if (busdma_swi_pending != 0)
986		busdma_swi();
987}
988
989/*
990 * Tell whether this address is in some physical memory region.
991 * Currently used by the kernel coredump code in order to avoid
992 * dumping the ``ISA memory hole'' which could cause indefinite hangs,
993 * or other unpredictable behaviour.
994 */
995
996#include "isa.h"
997
998int
999is_physical_memory(addr)
1000	vm_offset_t addr;
1001{
1002
1003#if NISA > 0
1004	/* The ISA ``memory hole''. */
1005	if (addr >= 0xa0000 && addr < 0x100000)
1006		return 0;
1007#endif
1008
1009	/*
1010	 * stuff other tests for known memory-mapped devices (PCI?)
1011	 * here
1012	 */
1013
1014	return 1;
1015}
1016