vm_machdep.c revision 34643
1/*-
2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * Copyright (c) 1994 John Dyson
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department, and William Jolitz.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
40 *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
41 *	$Id: vm_machdep.c,v 1.103 1998/03/14 03:02:15 tegge Exp $
42 */
43
44#include "npx.h"
45#include "opt_bounce.h"
46#include "opt_user_ldt.h"
47#include "opt_vm86.h"
48#ifdef PC98
49#include "opt_pc98.h"
50#endif
51
52#include <sys/param.h>
53#include <sys/systm.h>
54#include <sys/proc.h>
55#include <sys/malloc.h>
56#include <sys/buf.h>
57#include <sys/vnode.h>
58#include <sys/vmmeter.h>
59#include <sys/kernel.h>
60#include <sys/sysctl.h>
61
62#include <machine/clock.h>
63#include <machine/cpu.h>
64#include <machine/md_var.h>
65#ifdef SMP
66#include <machine/smp.h>
67#endif
68
69#include <vm/vm.h>
70#include <vm/vm_param.h>
71#include <vm/vm_prot.h>
72#include <sys/lock.h>
73#include <vm/vm_kern.h>
74#include <vm/vm_page.h>
75#include <vm/vm_map.h>
76#include <vm/vm_extern.h>
77
78#include <sys/user.h>
79
80#ifdef PC98
81#include <pc98/pc98/pc98.h>
82#else
83#include <i386/isa/isa.h>
84#endif
85
86#ifdef BOUNCE_BUFFERS
87static vm_offset_t
88		vm_bounce_kva __P((int size, int waitok));
89static void	vm_bounce_kva_free __P((vm_offset_t addr, vm_offset_t size,
90					int now));
91static vm_offset_t
92		vm_bounce_page_find __P((int count));
93static void	vm_bounce_page_free __P((vm_offset_t pa, int count));
94
95static volatile int	kvasfreecnt;
96
97caddr_t		bouncememory;
98static int	bpwait;
99static vm_offset_t	*bouncepa;
100static int		bmwait, bmfreeing;
101
102#define BITS_IN_UNSIGNED (8*sizeof(unsigned))
103static int		bounceallocarraysize;
104static unsigned	*bounceallocarray;
105static int		bouncefree;
106
107#if defined(PC98) && defined (EPSON_BOUNCEDMA)
108#define SIXTEENMEG (3840*4096)			/* 15MB boundary */
109#else
110#define SIXTEENMEG (4096*4096)
111#endif
112#define MAXBKVA 1024
113int		maxbkva = MAXBKVA*PAGE_SIZE;
114
115/* special list that can be used at interrupt time for eventual kva free */
116static struct kvasfree {
117	vm_offset_t addr;
118	vm_offset_t size;
119} kvaf[MAXBKVA];
120
121/*
122 * get bounce buffer pages (count physically contiguous)
123 * (only 1 inplemented now)
124 */
125static vm_offset_t
126vm_bounce_page_find(count)
127	int count;
128{
129	int bit;
130	int s,i;
131
132	if (count != 1)
133		panic("vm_bounce_page_find -- no support for > 1 page yet!!!");
134
135	s = splbio();
136retry:
137	for (i = 0; i < bounceallocarraysize; i++) {
138		if (bounceallocarray[i] != 0xffffffff) {
139			bit = ffs(~bounceallocarray[i]);
140			if (bit) {
141				bounceallocarray[i] |= 1 << (bit - 1) ;
142				bouncefree -= count;
143				splx(s);
144				return bouncepa[(i * BITS_IN_UNSIGNED + (bit - 1))];
145			}
146		}
147	}
148	bpwait = 1;
149	tsleep((caddr_t) &bounceallocarray, PRIBIO, "bncwai", 0);
150	goto retry;
151}
152
153static void
154vm_bounce_kva_free(addr, size, now)
155	vm_offset_t addr;
156	vm_offset_t size;
157	int now;
158{
159	int s = splbio();
160	kvaf[kvasfreecnt].addr = addr;
161	kvaf[kvasfreecnt].size = size;
162	++kvasfreecnt;
163	if( now) {
164		/*
165		 * this will do wakeups
166		 */
167		vm_bounce_kva(0,0);
168	} else {
169		if (bmwait) {
170		/*
171		 * if anyone is waiting on the bounce-map, then wakeup
172		 */
173			wakeup((caddr_t) io_map);
174			bmwait = 0;
175		}
176	}
177	splx(s);
178}
179
180/*
181 * free count bounce buffer pages
182 */
183static void
184vm_bounce_page_free(pa, count)
185	vm_offset_t pa;
186	int count;
187{
188	int allocindex;
189	int index;
190	int bit;
191
192	if (count != 1)
193		panic("vm_bounce_page_free -- no support for > 1 page yet!!!");
194
195	for(index=0;index<bouncepages;index++) {
196		if( pa == bouncepa[index])
197			break;
198	}
199
200	if( index == bouncepages)
201		panic("vm_bounce_page_free: invalid bounce buffer");
202
203	allocindex = index / BITS_IN_UNSIGNED;
204	bit = index % BITS_IN_UNSIGNED;
205
206	bounceallocarray[allocindex] &= ~(1 << bit);
207
208	bouncefree += count;
209	if (bpwait) {
210		bpwait = 0;
211		wakeup((caddr_t) &bounceallocarray);
212	}
213}
214
215/*
216 * allocate count bounce buffer kva pages
217 */
218static vm_offset_t
219vm_bounce_kva(size, waitok)
220	int size;
221	int waitok;
222{
223	int i;
224	vm_offset_t kva = 0;
225	vm_offset_t off;
226	int s = splbio();
227more:
228	if (!bmfreeing && kvasfreecnt) {
229		bmfreeing = 1;
230		for (i = 0; i < kvasfreecnt; i++) {
231			for(off=0;off<kvaf[i].size;off+=PAGE_SIZE) {
232				pmap_kremove( kvaf[i].addr + off);
233			}
234			kmem_free_wakeup(io_map, kvaf[i].addr,
235				kvaf[i].size);
236		}
237		kvasfreecnt = 0;
238		bmfreeing = 0;
239		if( bmwait) {
240			bmwait = 0;
241			wakeup( (caddr_t) io_map);
242		}
243	}
244
245	if( size == 0) {
246		splx(s);
247		return 0;
248	}
249
250	if ((kva = kmem_alloc_pageable(io_map, size)) == 0) {
251		if( !waitok) {
252			splx(s);
253			return 0;
254		}
255		bmwait = 1;
256		tsleep((caddr_t) io_map, PRIBIO, "bmwait", 0);
257		goto more;
258	}
259	splx(s);
260	return kva;
261}
262
263/*
264 * same as vm_bounce_kva -- but really allocate (but takes pages as arg)
265 */
266vm_offset_t
267vm_bounce_kva_alloc(count)
268int count;
269{
270	int i;
271	vm_offset_t kva;
272	vm_offset_t pa;
273	if( bouncepages == 0) {
274		kva = (vm_offset_t) malloc(count*PAGE_SIZE, M_TEMP, M_WAITOK);
275		return kva;
276	}
277	kva = vm_bounce_kva(count*PAGE_SIZE, 1);
278	for(i=0;i<count;i++) {
279		pa = vm_bounce_page_find(1);
280		pmap_kenter(kva + i * PAGE_SIZE, pa);
281	}
282	return kva;
283}
284
285/*
286 * same as vm_bounce_kva_free -- but really free
287 */
288void
289vm_bounce_kva_alloc_free(kva, count)
290	vm_offset_t kva;
291	int count;
292{
293	int i;
294	vm_offset_t pa;
295	if( bouncepages == 0) {
296		free((caddr_t) kva, M_TEMP);
297		return;
298	}
299	for(i = 0; i < count; i++) {
300		pa = pmap_kextract(kva + i * PAGE_SIZE);
301		vm_bounce_page_free(pa, 1);
302	}
303	vm_bounce_kva_free(kva, count*PAGE_SIZE, 0);
304}
305
306/*
307 * do the things necessary to the struct buf to implement
308 * bounce buffers...  inserted before the disk sort
309 */
310void
311vm_bounce_alloc(bp)
312	struct buf *bp;
313{
314	int countvmpg;
315	vm_offset_t vastart, vaend;
316	vm_offset_t vapstart, vapend;
317	vm_offset_t va, kva;
318	vm_offset_t pa;
319	int dobounceflag = 0;
320	int i;
321
322	if (bouncepages == 0)
323		return;
324
325	if (bp->b_flags & B_BOUNCE) {
326		printf("vm_bounce_alloc: called recursively???\n");
327		return;
328	}
329
330	if (bp->b_bufsize < bp->b_bcount) {
331		printf(
332		    "vm_bounce_alloc: b_bufsize(0x%lx) < b_bcount(0x%lx) !!\n",
333			bp->b_bufsize, bp->b_bcount);
334		panic("vm_bounce_alloc");
335	}
336
337/*
338 *  This is not really necessary
339 *	if( bp->b_bufsize != bp->b_bcount) {
340 *		printf("size: %d, count: %d\n", bp->b_bufsize, bp->b_bcount);
341 *	}
342 */
343
344
345	vastart = (vm_offset_t) bp->b_data;
346	vaend = (vm_offset_t) bp->b_data + bp->b_bufsize;
347
348	vapstart = trunc_page(vastart);
349	vapend = round_page(vaend);
350	countvmpg = (vapend - vapstart) / PAGE_SIZE;
351
352/*
353 * if any page is above 16MB, then go into bounce-buffer mode
354 */
355	va = vapstart;
356	for (i = 0; i < countvmpg; i++) {
357		pa = pmap_kextract(va);
358		if (pa >= SIXTEENMEG)
359			++dobounceflag;
360		if( pa == 0)
361			panic("vm_bounce_alloc: Unmapped page");
362		va += PAGE_SIZE;
363	}
364	if (dobounceflag == 0)
365		return;
366
367	if (bouncepages < dobounceflag)
368		panic("Not enough bounce buffers!!!");
369
370/*
371 * allocate a replacement kva for b_addr
372 */
373	kva = vm_bounce_kva(countvmpg*PAGE_SIZE, 1);
374#if 0
375	printf("%s: vapstart: %x, vapend: %x, countvmpg: %d, kva: %x ",
376		(bp->b_flags & B_READ) ? "read":"write",
377			vapstart, vapend, countvmpg, kva);
378#endif
379	va = vapstart;
380	for (i = 0; i < countvmpg; i++) {
381		pa = pmap_kextract(va);
382		if (pa >= SIXTEENMEG) {
383			/*
384			 * allocate a replacement page
385			 */
386			vm_offset_t bpa = vm_bounce_page_find(1);
387			pmap_kenter(kva + (PAGE_SIZE * i), bpa);
388#if 0
389			printf("r(%d): (%x,%x,%x) ", i, va, pa, bpa);
390#endif
391			/*
392			 * if we are writing, the copy the data into the page
393			 */
394			if ((bp->b_flags & B_READ) == 0) {
395				bcopy((caddr_t) va, (caddr_t) kva + (PAGE_SIZE * i), PAGE_SIZE);
396			}
397		} else {
398			/*
399			 * use original page
400			 */
401			pmap_kenter(kva + (PAGE_SIZE * i), pa);
402		}
403		va += PAGE_SIZE;
404	}
405
406/*
407 * flag the buffer as being bounced
408 */
409	bp->b_flags |= B_BOUNCE;
410/*
411 * save the original buffer kva
412 */
413	bp->b_savekva = bp->b_data;
414/*
415 * put our new kva into the buffer (offset by original offset)
416 */
417	bp->b_data = (caddr_t) (((vm_offset_t) kva) |
418				((vm_offset_t) bp->b_savekva & PAGE_MASK));
419#if 0
420	printf("b_savekva: %x, newva: %x\n", bp->b_savekva, bp->b_data);
421#endif
422	return;
423}
424
425/*
426 * hook into biodone to free bounce buffer
427 */
428void
429vm_bounce_free(bp)
430	struct buf *bp;
431{
432	int i;
433	vm_offset_t origkva, bouncekva, bouncekvaend;
434
435/*
436 * if this isn't a bounced buffer, then just return
437 */
438	if ((bp->b_flags & B_BOUNCE) == 0)
439		return;
440
441/*
442 *  This check is not necessary
443 *	if (bp->b_bufsize != bp->b_bcount) {
444 *		printf("vm_bounce_free: b_bufsize=%d, b_bcount=%d\n",
445 *			bp->b_bufsize, bp->b_bcount);
446 *	}
447 */
448
449	origkva = (vm_offset_t) bp->b_savekva;
450	bouncekva = (vm_offset_t) bp->b_data;
451/*
452	printf("free: %d ", bp->b_bufsize);
453*/
454
455/*
456 * check every page in the kva space for b_addr
457 */
458	for (i = 0; i < bp->b_bufsize; ) {
459		vm_offset_t mybouncepa;
460		vm_offset_t copycount;
461
462		copycount = round_page(bouncekva + 1) - bouncekva;
463		mybouncepa = pmap_kextract(trunc_page(bouncekva));
464
465/*
466 * if this is a bounced pa, then process as one
467 */
468		if ( mybouncepa != pmap_kextract( trunc_page( origkva))) {
469			vm_offset_t tocopy = copycount;
470			if (i + tocopy > bp->b_bufsize)
471				tocopy = bp->b_bufsize - i;
472/*
473 * if this is a read, then copy from bounce buffer into original buffer
474 */
475			if (bp->b_flags & B_READ)
476				bcopy((caddr_t) bouncekva, (caddr_t) origkva, tocopy);
477/*
478 * free the bounce allocation
479 */
480
481/*
482			printf("(kva: %x, pa: %x)", bouncekva, mybouncepa);
483*/
484			vm_bounce_page_free(mybouncepa, 1);
485		}
486
487		origkva += copycount;
488		bouncekva += copycount;
489		i += copycount;
490	}
491
492/*
493	printf("\n");
494*/
495/*
496 * add the old kva into the "to free" list
497 */
498
499	bouncekva= trunc_page((vm_offset_t) bp->b_data);
500	bouncekvaend= round_page((vm_offset_t)bp->b_data + bp->b_bufsize);
501
502/*
503	printf("freeva: %d\n", (bouncekvaend - bouncekva) / PAGE_SIZE);
504*/
505	vm_bounce_kva_free( bouncekva, (bouncekvaend - bouncekva), 0);
506	bp->b_data = bp->b_savekva;
507	bp->b_savekva = 0;
508	bp->b_flags &= ~B_BOUNCE;
509
510	return;
511}
512
513
514/*
515 * init the bounce buffer system
516 */
517void
518vm_bounce_init()
519{
520	int i;
521
522	kvasfreecnt = 0;
523
524	if (bouncepages == 0)
525		return;
526
527	bounceallocarraysize = (bouncepages + BITS_IN_UNSIGNED - 1) / BITS_IN_UNSIGNED;
528	bounceallocarray = malloc(bounceallocarraysize * sizeof(unsigned), M_TEMP, M_NOWAIT);
529
530	if (!bounceallocarray)
531		panic("Cannot allocate bounce resource array");
532
533	bouncepa = malloc(bouncepages * sizeof(vm_offset_t), M_TEMP, M_NOWAIT);
534	if (!bouncepa)
535		panic("Cannot allocate physical memory array");
536
537	for(i=0;i<bounceallocarraysize;i++) {
538		bounceallocarray[i] = 0xffffffff;
539	}
540
541	for(i=0;i<bouncepages;i++) {
542		vm_offset_t pa;
543		if( (pa = pmap_kextract((vm_offset_t) bouncememory + i * PAGE_SIZE)) >= SIXTEENMEG) {
544			printf("vm_bounce_init: bounce memory out of range -- bounce disabled\n");
545			free(bounceallocarray, M_TEMP);
546			bounceallocarray = NULL;
547			free(bouncepa, M_TEMP);
548			bouncepa = NULL;
549			bouncepages = 0;
550			break;
551		}
552		if( pa == 0)
553			panic("bounce memory not resident");
554		bouncepa[i] = pa;
555		bounceallocarray[i/(8*sizeof(int))] &= ~(1<<(i%(8*sizeof(int))));
556	}
557	bouncefree = bouncepages;
558
559}
560#endif /* BOUNCE_BUFFERS */
561
562/*
563 * quick version of vm_fault
564 */
565void
566vm_fault_quick(v, prot)
567	caddr_t v;
568	int prot;
569{
570	if (prot & VM_PROT_WRITE)
571		subyte(v, fubyte(v));
572	else
573		fubyte(v);
574}
575
576/*
577 * Finish a fork operation, with process p2 nearly set up.
578 * Copy and update the pcb, set up the stack so that the child
579 * ready to run and return to user mode.
580 */
581void
582cpu_fork(p1, p2)
583	register struct proc *p1, *p2;
584{
585	struct pcb *pcb2 = &p2->p_addr->u_pcb;
586
587#if NNPX > 0
588	/* Ensure that p1's pcb is up to date. */
589	if (npxproc == p1)
590		npxsave(&p1->p_addr->u_pcb.pcb_savefpu);
591#endif
592
593	/* Copy p1's pcb. */
594	p2->p_addr->u_pcb = p1->p_addr->u_pcb;
595
596	/*
597	 * Create a new fresh stack for the new process.
598	 * Copy the trap frame for the return to user mode as if from a
599	 * syscall.  This copies the user mode register values.
600	 */
601	p2->p_md.md_regs = (struct trapframe *)
602			   ((int)p2->p_addr + UPAGES * PAGE_SIZE) - 1;
603	*p2->p_md.md_regs = *p1->p_md.md_regs;
604
605	/*
606	 * Set registers for trampoline to user mode.  Leave space for the
607	 * return address on stack.  These are the kernel mode register values.
608	 */
609	pcb2->pcb_cr3 = vtophys(p2->p_vmspace->vm_pmap.pm_pdir);
610	pcb2->pcb_edi = p2->p_md.md_regs->tf_edi;
611	pcb2->pcb_esi = (int)fork_return;
612	pcb2->pcb_ebp = p2->p_md.md_regs->tf_ebp;
613	pcb2->pcb_esp = (int)p2->p_md.md_regs - sizeof(void *);
614	pcb2->pcb_ebx = (int)p2;
615	pcb2->pcb_eip = (int)fork_trampoline;
616	/*
617	 * pcb2->pcb_ldt:	duplicated below, if necessary.
618	 * pcb2->pcb_ldt_len:	cloned above.
619	 * pcb2->pcb_savefpu:	cloned above.
620	 * pcb2->pcb_flags:	cloned above (always 0 here?).
621	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
622	 */
623
624#ifdef VM86
625	/*
626	 * XXX don't copy the i/o pages.  this should probably be fixed.
627	 */
628	pcb2->pcb_ext = 0;
629#endif
630
631#ifdef USER_LDT
632        /* Copy the LDT, if necessary. */
633        if (pcb2->pcb_ldt != 0) {
634                union descriptor *new_ldt;
635                size_t len = pcb2->pcb_ldt_len * sizeof(union descriptor);
636
637                new_ldt = (union descriptor *)kmem_alloc(kernel_map, len);
638                bcopy(pcb2->pcb_ldt, new_ldt, len);
639                pcb2->pcb_ldt = (caddr_t)new_ldt;
640        }
641#endif
642
643	/*
644	 * Now, cpu_switch() can schedule the new process.
645	 * pcb_esp is loaded pointing to the cpu_switch() stack frame
646	 * containing the return address when exiting cpu_switch.
647	 * This will normally be to proc_trampoline(), which will have
648	 * %ebx loaded with the new proc's pointer.  proc_trampoline()
649	 * will set up a stack to call fork_return(p, frame); to complete
650	 * the return to user-mode.
651	 */
652}
653
654/*
655 * Intercept the return address from a freshly forked process that has NOT
656 * been scheduled yet.
657 *
658 * This is needed to make kernel threads stay in kernel mode.
659 */
660void
661cpu_set_fork_handler(p, func, arg)
662	struct proc *p;
663	void (*func) __P((void *));
664	void *arg;
665{
666	/*
667	 * Note that the trap frame follows the args, so the function
668	 * is really called like this:  func(arg, frame);
669	 */
670	p->p_addr->u_pcb.pcb_esi = (int) func;	/* function */
671	p->p_addr->u_pcb.pcb_ebx = (int) arg;	/* first arg */
672}
673
674void
675cpu_exit(p)
676	register struct proc *p;
677{
678#if defined(USER_LDT) || defined(VM86)
679	struct pcb *pcb = &p->p_addr->u_pcb;
680#endif
681
682#if NNPX > 0
683	npxexit(p);
684#endif	/* NNPX */
685#ifdef VM86
686	if (pcb->pcb_ext != 0) {
687	        /*
688		 * XXX do we need to move the TSS off the allocated pages
689		 * before freeing them?  (not done here)
690		 */
691		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ext,
692		    ctob(IOPAGES + 1));
693		pcb->pcb_ext = 0;
694	}
695#endif
696#ifdef USER_LDT
697	if (pcb->pcb_ldt != 0) {
698		if (pcb == curpcb)
699			lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
700		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
701			pcb->pcb_ldt_len * sizeof(union descriptor));
702		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
703	}
704#endif
705	cnt.v_swtch++;
706	cpu_switch(p);
707	panic("cpu_exit");
708}
709
710void
711cpu_wait(p)
712	struct proc *p;
713{
714	/* drop per-process resources */
715	pmap_dispose_proc(p);
716
717	/* and clean-out the vmspace */
718	vmspace_free(p->p_vmspace);
719}
720
721/*
722 * Dump the machine specific header information at the start of a core dump.
723 */
724int
725cpu_coredump(p, vp, cred)
726	struct proc *p;
727	struct vnode *vp;
728	struct ucred *cred;
729{
730
731	return (vn_rdwr(UIO_WRITE, vp, (caddr_t) p->p_addr, ctob(UPAGES),
732	    (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *)NULL,
733	    p));
734}
735
736#ifdef notyet
737static void
738setredzone(pte, vaddr)
739	u_short *pte;
740	caddr_t vaddr;
741{
742/* eventually do this by setting up an expand-down stack segment
743   for ss0: selector, allowing stack access down to top of u.
744   this means though that protection violations need to be handled
745   thru a double fault exception that must do an integral task
746   switch to a known good context, within which a dump can be
747   taken. a sensible scheme might be to save the initial context
748   used by sched (that has physical memory mapped 1:1 at bottom)
749   and take the dump while still in mapped mode */
750}
751#endif
752
753/*
754 * Convert kernel VA to physical address
755 */
756u_long
757kvtop(void *addr)
758{
759	vm_offset_t va;
760
761	va = pmap_kextract((vm_offset_t)addr);
762	if (va == 0)
763		panic("kvtop: zero page frame");
764	return((int)va);
765}
766
767/*
768 * Map an IO request into kernel virtual address space.
769 *
770 * All requests are (re)mapped into kernel VA space.
771 * Notice that we use b_bufsize for the size of the buffer
772 * to be mapped.  b_bcount might be modified by the driver.
773 */
774void
775vmapbuf(bp)
776	register struct buf *bp;
777{
778	register caddr_t addr, v, kva;
779	vm_offset_t pa;
780
781	if ((bp->b_flags & B_PHYS) == 0)
782		panic("vmapbuf");
783
784	for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page(bp->b_data);
785	    addr < bp->b_data + bp->b_bufsize;
786	    addr += PAGE_SIZE, v += PAGE_SIZE) {
787		/*
788		 * Do the vm_fault if needed; do the copy-on-write thing
789		 * when reading stuff off device into memory.
790		 */
791		vm_fault_quick(addr,
792			(bp->b_flags&B_READ)?(VM_PROT_READ|VM_PROT_WRITE):VM_PROT_READ);
793		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
794		if (pa == 0)
795			panic("vmapbuf: page not present");
796		vm_page_hold(PHYS_TO_VM_PAGE(pa));
797		pmap_kenter((vm_offset_t) v, pa);
798	}
799
800	kva = bp->b_saveaddr;
801	bp->b_saveaddr = bp->b_data;
802	bp->b_data = kva + (((vm_offset_t) bp->b_data) & PAGE_MASK);
803}
804
805/*
806 * Free the io map PTEs associated with this IO operation.
807 * We also invalidate the TLB entries and restore the original b_addr.
808 */
809void
810vunmapbuf(bp)
811	register struct buf *bp;
812{
813	register caddr_t addr;
814	vm_offset_t pa;
815
816	if ((bp->b_flags & B_PHYS) == 0)
817		panic("vunmapbuf");
818
819	for (addr = (caddr_t)trunc_page(bp->b_data);
820	    addr < bp->b_data + bp->b_bufsize;
821	    addr += PAGE_SIZE) {
822		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
823		pmap_kremove((vm_offset_t) addr);
824		vm_page_unhold(PHYS_TO_VM_PAGE(pa));
825	}
826
827	bp->b_data = bp->b_saveaddr;
828}
829
830/*
831 * Force reset the processor by invalidating the entire address space!
832 */
833void
834cpu_reset()
835{
836
837#ifdef PC98
838	/*
839	 * Attempt to do a CPU reset via CPU reset port.
840	 */
841	disable_intr();
842	outb(0x37, 0x0f);		/* SHUT0 = 0. */
843	outb(0x37, 0x0b);		/* SHUT1 = 0. */
844	outb(0xf0, 0x00);		/* Reset. */
845#else
846	/*
847	 * Attempt to do a CPU reset via the keyboard controller,
848	 * do not turn of the GateA20, as any machine that fails
849	 * to do the reset here would then end up in no man's land.
850	 */
851
852#if !defined(BROKEN_KEYBOARD_RESET)
853	outb(IO_KBD + 4, 0xFE);
854	DELAY(500000);	/* wait 0.5 sec to see if that did it */
855	printf("Keyboard reset did not work, attempting CPU shutdown\n");
856	DELAY(1000000);	/* wait 1 sec for printf to complete */
857#endif
858#endif /* PC98 */
859	/* force a shutdown by unmapping entire address space ! */
860	bzero((caddr_t) PTD, PAGE_SIZE);
861
862	/* "good night, sweet prince .... <THUNK!>" */
863	invltlb();
864	/* NOTREACHED */
865	while(1);
866}
867
868/*
869 * Grow the user stack to allow for 'sp'. This version grows the stack in
870 *	chunks of SGROWSIZ.
871 */
872int
873grow(p, sp)
874	struct proc *p;
875	u_int sp;
876{
877	unsigned int nss;
878	caddr_t v;
879	struct vmspace *vm = p->p_vmspace;
880
881	if ((caddr_t)sp <= vm->vm_maxsaddr || (unsigned)sp >= (unsigned)USRSTACK)
882	    return (1);
883
884	nss = roundup(USRSTACK - (unsigned)sp, PAGE_SIZE);
885
886	if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur)
887		return (0);
888
889	if (vm->vm_ssize && roundup(vm->vm_ssize << PAGE_SHIFT,
890	    SGROWSIZ) < nss) {
891		int grow_amount;
892		/*
893		 * If necessary, grow the VM that the stack occupies
894		 * to allow for the rlimit. This allows us to not have
895		 * to allocate all of the VM up-front in execve (which
896		 * is expensive).
897		 * Grow the VM by the amount requested rounded up to
898		 * the nearest SGROWSIZ to provide for some hysteresis.
899		 */
900		grow_amount = roundup((nss - (vm->vm_ssize << PAGE_SHIFT)), SGROWSIZ);
901		v = (char *)USRSTACK - roundup(vm->vm_ssize << PAGE_SHIFT,
902		    SGROWSIZ) - grow_amount;
903		/*
904		 * If there isn't enough room to extend by SGROWSIZ, then
905		 * just extend to the maximum size
906		 */
907		if (v < vm->vm_maxsaddr) {
908			v = vm->vm_maxsaddr;
909			grow_amount = MAXSSIZ - (vm->vm_ssize << PAGE_SHIFT);
910		}
911		if ((grow_amount == 0) || (vm_map_find(&vm->vm_map, NULL, 0, (vm_offset_t *)&v,
912		    grow_amount, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != KERN_SUCCESS)) {
913			return (0);
914		}
915		vm->vm_ssize += grow_amount >> PAGE_SHIFT;
916	}
917
918	return (1);
919}
920
921static int cnt_prezero;
922
923SYSCTL_INT(_machdep, OID_AUTO, cnt_prezero, CTLFLAG_RD, &cnt_prezero, 0, "");
924
925/*
926 * Implement the pre-zeroed page mechanism.
927 * This routine is called from the idle loop.
928 */
929int
930vm_page_zero_idle()
931{
932	static int free_rover;
933	vm_page_t m;
934	int s;
935
936	/*
937	 * XXX
938	 * We stop zeroing pages when there are sufficent prezeroed pages.
939	 * This threshold isn't really needed, except we want to
940	 * bypass unneeded calls to vm_page_list_find, and the
941	 * associated cache flush and latency.  The pre-zero will
942	 * still be called when there are significantly more
943	 * non-prezeroed pages than zeroed pages.  The threshold
944	 * of half the number of reserved pages is arbitrary, but
945	 * approximately the right amount.  Eventually, we should
946	 * perhaps interrupt the zero operation when a process
947	 * is found to be ready to run.
948	 */
949	if (cnt.v_free_count - vm_page_zero_count <= cnt.v_free_reserved / 2)
950		return (0);
951#ifdef SMP
952	if (try_mplock()) {
953#endif
954		s = splvm();
955		__asm __volatile("sti" : : : "memory");
956		m = vm_page_list_find(PQ_FREE, free_rover);
957		if (m != NULL) {
958			--(*vm_page_queues[m->queue].lcnt);
959			TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq);
960			m->queue = PQ_NONE;
961			splx(s);
962#if 0
963			rel_mplock();
964#endif
965			pmap_zero_page(VM_PAGE_TO_PHYS(m));
966#if 0
967			get_mplock();
968#endif
969			(void)splvm();
970			m->queue = PQ_ZERO + m->pc;
971			++(*vm_page_queues[m->queue].lcnt);
972			TAILQ_INSERT_HEAD(vm_page_queues[m->queue].pl, m,
973			    pageq);
974			free_rover = (free_rover + PQ_PRIME3) & PQ_L2_MASK;
975			++vm_page_zero_count;
976			++cnt_prezero;
977		}
978		splx(s);
979		__asm __volatile("cli" : : : "memory");
980#ifdef SMP
981		rel_mplock();
982#endif
983		return (1);
984#ifdef SMP
985	}
986#endif
987	return (0);
988}
989
990/*
991 * Software interrupt handler for queued VM system processing.
992 */
993void
994swi_vm()
995{
996	if (busdma_swi_pending != 0)
997		busdma_swi();
998}
999
1000/*
1001 * Tell whether this address is in some physical memory region.
1002 * Currently used by the kernel coredump code in order to avoid
1003 * dumping the ``ISA memory hole'' which could cause indefinite hangs,
1004 * or other unpredictable behaviour.
1005 */
1006
1007#include "isa.h"
1008
1009int
1010is_physical_memory(addr)
1011	vm_offset_t addr;
1012{
1013
1014#if NISA > 0
1015	/* The ISA ``memory hole''. */
1016	if (addr >= 0xa0000 && addr < 0x100000)
1017		return 0;
1018#endif
1019
1020	/*
1021	 * stuff other tests for known memory-mapped devices (PCI?)
1022	 * here
1023	 */
1024
1025	return 1;
1026}
1027